1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a pass that removes irreducible control flow. 11 /// Irreducible control flow means multiple-entry loops, which this pass 12 /// transforms to have a single entry. 13 /// 14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but 15 /// it linearizes control flow, turning diamonds into two triangles, which is 16 /// both unnecessary and undesirable for WebAssembly. 17 /// 18 /// The big picture: We recursively process each "region", defined as a group 19 /// of blocks with a single entry and no branches back to that entry. A region 20 /// may be the entire function body, or the inner part of a loop, i.e., the 21 /// loop's body without branches back to the loop entry. In each region we fix 22 /// up multi-entry loops by adding a new block that can dispatch to each of the 23 /// loop entries, based on the value of a label "helper" variable, and we 24 /// replace direct branches to the entries with assignments to the label 25 /// variable and a branch to the dispatch block. Then the dispatch block is the 26 /// single entry in the loop containing the previous multiple entries. After 27 /// ensuring all the loops in a region are reducible, we recurse into them. The 28 /// total time complexity of this pass is: 29 /// 30 /// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops + 31 /// NumLoops * NumLoops) 32 /// 33 /// This pass is similar to what the Relooper [1] does. Both identify looping 34 /// code that requires multiple entries, and resolve it in a similar way (in 35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note 36 /// also that like the Relooper, we implement a "minimal" intervention: we only 37 /// use the "label" helper for the blocks we absolutely must and no others. We 38 /// also prioritize code size and do not duplicate code in order to resolve 39 /// irreducibility. The graph algorithms for finding loops and entries and so 40 /// forth are also similar to the Relooper. The main differences between this 41 /// pass and the Relooper are: 42 /// 43 /// * We just care about irreducibility, so we just look at loops. 44 /// * The Relooper emits structured control flow (with ifs etc.), while we 45 /// emit a CFG. 46 /// 47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In 48 /// Proceedings of the ACM international conference companion on Object oriented 49 /// programming systems languages and applications companion (SPLASH '11). ACM, 50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 51 /// http://doi.acm.org/10.1145/2048147.2048224 52 /// 53 //===----------------------------------------------------------------------===// 54 55 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 56 #include "WebAssembly.h" 57 #include "WebAssemblySubtarget.h" 58 #include "llvm/CodeGen/MachineFunctionPass.h" 59 #include "llvm/CodeGen/MachineInstrBuilder.h" 60 #include "llvm/Support/Debug.h" 61 using namespace llvm; 62 63 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" 64 65 namespace { 66 67 using BlockVector = SmallVector<MachineBasicBlock *, 4>; 68 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>; 69 70 static BlockVector getSortedEntries(const BlockSet &Entries) { 71 BlockVector SortedEntries(Entries.begin(), Entries.end()); 72 llvm::sort(SortedEntries, 73 [](const MachineBasicBlock *A, const MachineBasicBlock *B) { 74 auto ANum = A->getNumber(); 75 auto BNum = B->getNumber(); 76 return ANum < BNum; 77 }); 78 return SortedEntries; 79 } 80 81 // Calculates reachability in a region. Ignores branches to blocks outside of 82 // the region, and ignores branches to the region entry (for the case where 83 // the region is the inner part of a loop). 84 class ReachabilityGraph { 85 public: 86 ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks) 87 : Entry(Entry), Blocks(Blocks) { 88 #ifndef NDEBUG 89 // The region must have a single entry. 90 for (auto *MBB : Blocks) { 91 if (MBB != Entry) { 92 for (auto *Pred : MBB->predecessors()) { 93 assert(inRegion(Pred)); 94 } 95 } 96 } 97 #endif 98 calculate(); 99 } 100 101 bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const { 102 assert(inRegion(From) && inRegion(To)); 103 auto I = Reachable.find(From); 104 if (I == Reachable.end()) 105 return false; 106 return I->second.count(To); 107 } 108 109 // "Loopers" are blocks that are in a loop. We detect these by finding blocks 110 // that can reach themselves. 111 const BlockSet &getLoopers() const { return Loopers; } 112 113 // Get all blocks that are loop entries. 114 const BlockSet &getLoopEntries() const { return LoopEntries; } 115 116 // Get all blocks that enter a particular loop from outside. 117 const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const { 118 assert(inRegion(LoopEntry)); 119 auto I = LoopEnterers.find(LoopEntry); 120 assert(I != LoopEnterers.end()); 121 return I->second; 122 } 123 124 private: 125 MachineBasicBlock *Entry; 126 const BlockSet &Blocks; 127 128 BlockSet Loopers, LoopEntries; 129 DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers; 130 131 bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); } 132 133 // Maps a block to all the other blocks it can reach. 134 DenseMap<MachineBasicBlock *, BlockSet> Reachable; 135 136 void calculate() { 137 // Reachability computation work list. Contains pairs of recent additions 138 // (A, B) where we just added a link A => B. 139 using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>; 140 SmallVector<BlockPair, 4> WorkList; 141 142 // Add all relevant direct branches. 143 for (auto *MBB : Blocks) { 144 for (auto *Succ : MBB->successors()) { 145 if (Succ != Entry && inRegion(Succ)) { 146 Reachable[MBB].insert(Succ); 147 WorkList.emplace_back(MBB, Succ); 148 } 149 } 150 } 151 152 while (!WorkList.empty()) { 153 MachineBasicBlock *MBB, *Succ; 154 std::tie(MBB, Succ) = WorkList.pop_back_val(); 155 assert(inRegion(MBB) && Succ != Entry && inRegion(Succ)); 156 if (MBB != Entry) { 157 // We recently added MBB => Succ, and that means we may have enabled 158 // Pred => MBB => Succ. 159 for (auto *Pred : MBB->predecessors()) { 160 if (Reachable[Pred].insert(Succ).second) { 161 WorkList.emplace_back(Pred, Succ); 162 } 163 } 164 } 165 } 166 167 // Blocks that can return to themselves are in a loop. 168 for (auto *MBB : Blocks) { 169 if (canReach(MBB, MBB)) { 170 Loopers.insert(MBB); 171 } 172 } 173 assert(!Loopers.count(Entry)); 174 175 // Find the loop entries - loopers reachable from blocks not in that loop - 176 // and those outside blocks that reach them, the "loop enterers". 177 for (auto *Looper : Loopers) { 178 for (auto *Pred : Looper->predecessors()) { 179 // Pred can reach Looper. If Looper can reach Pred, it is in the loop; 180 // otherwise, it is a block that enters into the loop. 181 if (!canReach(Looper, Pred)) { 182 LoopEntries.insert(Looper); 183 LoopEnterers[Looper].insert(Pred); 184 } 185 } 186 } 187 } 188 }; 189 190 // Finds the blocks in a single-entry loop, given the loop entry and the 191 // list of blocks that enter the loop. 192 class LoopBlocks { 193 public: 194 LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers) 195 : Entry(Entry), Enterers(Enterers) { 196 calculate(); 197 } 198 199 BlockSet &getBlocks() { return Blocks; } 200 201 private: 202 MachineBasicBlock *Entry; 203 const BlockSet &Enterers; 204 205 BlockSet Blocks; 206 207 void calculate() { 208 // Going backwards from the loop entry, if we ignore the blocks entering 209 // from outside, we will traverse all the blocks in the loop. 210 BlockVector WorkList; 211 BlockSet AddedToWorkList; 212 Blocks.insert(Entry); 213 for (auto *Pred : Entry->predecessors()) { 214 if (!Enterers.count(Pred)) { 215 WorkList.push_back(Pred); 216 AddedToWorkList.insert(Pred); 217 } 218 } 219 220 while (!WorkList.empty()) { 221 auto *MBB = WorkList.pop_back_val(); 222 assert(!Enterers.count(MBB)); 223 if (Blocks.insert(MBB).second) { 224 for (auto *Pred : MBB->predecessors()) { 225 if (AddedToWorkList.insert(Pred).second) 226 WorkList.push_back(Pred); 227 } 228 } 229 } 230 } 231 }; 232 233 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { 234 StringRef getPassName() const override { 235 return "WebAssembly Fix Irreducible Control Flow"; 236 } 237 238 bool runOnMachineFunction(MachineFunction &MF) override; 239 240 bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks, 241 MachineFunction &MF); 242 243 void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks, 244 MachineFunction &MF, const ReachabilityGraph &Graph); 245 246 public: 247 static char ID; // Pass identification, replacement for typeid 248 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} 249 }; 250 251 bool WebAssemblyFixIrreducibleControlFlow::processRegion( 252 MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) { 253 bool Changed = false; 254 // Remove irreducibility before processing child loops, which may take 255 // multiple iterations. 256 while (true) { 257 ReachabilityGraph Graph(Entry, Blocks); 258 259 bool FoundIrreducibility = false; 260 261 for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) { 262 // Find mutual entries - all entries which can reach this one, and 263 // are reached by it (that always includes LoopEntry itself). All mutual 264 // entries must be in the same loop, so if we have more than one, then we 265 // have irreducible control flow. 266 // 267 // (Note that we need to sort the entries here, as otherwise the order can 268 // matter: being mutual is a symmetric relationship, and each set of 269 // mutuals will be handled properly no matter which we see first. However, 270 // there can be multiple disjoint sets of mutuals, and which we process 271 // first changes the output.) 272 // 273 // Note that irreducibility may involve inner loops, e.g. imagine A 274 // starts one loop, and it has B inside it which starts an inner loop. 275 // If we add a branch from all the way on the outside to B, then in a 276 // sense B is no longer an "inner" loop, semantically speaking. We will 277 // fix that irreducibility by adding a block that dispatches to either 278 // either A or B, so B will no longer be an inner loop in our output. 279 // (A fancier approach might try to keep it as such.) 280 // 281 // Note that we still need to recurse into inner loops later, to handle 282 // the case where the irreducibility is entirely nested - we would not 283 // be able to identify that at this point, since the enclosing loop is 284 // a group of blocks all of whom can reach each other. (We'll see the 285 // irreducibility after removing branches to the top of that enclosing 286 // loop.) 287 BlockSet MutualLoopEntries; 288 MutualLoopEntries.insert(LoopEntry); 289 for (auto *OtherLoopEntry : Graph.getLoopEntries()) { 290 if (OtherLoopEntry != LoopEntry && 291 Graph.canReach(LoopEntry, OtherLoopEntry) && 292 Graph.canReach(OtherLoopEntry, LoopEntry)) { 293 MutualLoopEntries.insert(OtherLoopEntry); 294 } 295 } 296 297 if (MutualLoopEntries.size() > 1) { 298 makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph); 299 FoundIrreducibility = true; 300 Changed = true; 301 break; 302 } 303 } 304 // Only go on to actually process the inner loops when we are done 305 // removing irreducible control flow and changing the graph. Modifying 306 // the graph as we go is possible, and that might let us avoid looking at 307 // the already-fixed loops again if we are careful, but all that is 308 // complex and bug-prone. Since irreducible loops are rare, just starting 309 // another iteration is best. 310 if (FoundIrreducibility) { 311 continue; 312 } 313 314 for (auto *LoopEntry : Graph.getLoopEntries()) { 315 LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry)); 316 // Each of these calls to processRegion may change the graph, but are 317 // guaranteed not to interfere with each other. The only changes we make 318 // to the graph are to add blocks on the way to a loop entry. As the 319 // loops are disjoint, that means we may only alter branches that exit 320 // another loop, which are ignored when recursing into that other loop 321 // anyhow. 322 if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) { 323 Changed = true; 324 } 325 } 326 327 return Changed; 328 } 329 } 330 331 // Given a set of entries to a single loop, create a single entry for that 332 // loop by creating a dispatch block for them, routing control flow using 333 // a helper variable. Also updates Blocks with any new blocks created, so 334 // that we properly track all the blocks in the region. But this does not update 335 // ReachabilityGraph; this will be updated in the caller of this function as 336 // needed. 337 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( 338 BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF, 339 const ReachabilityGraph &Graph) { 340 assert(Entries.size() >= 2); 341 342 // Sort the entries to ensure a deterministic build. 343 BlockVector SortedEntries = getSortedEntries(Entries); 344 345 #ifndef NDEBUG 346 for (auto Block : SortedEntries) 347 assert(Block->getNumber() != -1); 348 if (SortedEntries.size() > 1) { 349 for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E; 350 ++I) { 351 auto ANum = (*I)->getNumber(); 352 auto BNum = (*(std::next(I)))->getNumber(); 353 assert(ANum != BNum); 354 } 355 } 356 #endif 357 358 // Create a dispatch block which will contain a jump table to the entries. 359 MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); 360 MF.insert(MF.end(), Dispatch); 361 Blocks.insert(Dispatch); 362 363 // Add the jump table. 364 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 365 MachineInstrBuilder MIB = 366 BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32)); 367 368 // Add the register which will be used to tell the jump table which block to 369 // jump to. 370 MachineRegisterInfo &MRI = MF.getRegInfo(); 371 Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 372 MIB.addReg(Reg); 373 374 // Compute the indices in the superheader, one for each bad block, and 375 // add them as successors. 376 DenseMap<MachineBasicBlock *, unsigned> Indices; 377 for (auto *Entry : SortedEntries) { 378 auto Pair = Indices.insert(std::make_pair(Entry, 0)); 379 assert(Pair.second); 380 381 unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; 382 Pair.first->second = Index; 383 384 MIB.addMBB(Entry); 385 Dispatch->addSuccessor(Entry); 386 } 387 388 // Rewrite the problematic successors for every block that wants to reach 389 // the bad blocks. For simplicity, we just introduce a new block for every 390 // edge we need to rewrite. (Fancier things are possible.) 391 392 BlockVector AllPreds; 393 for (auto *Entry : SortedEntries) { 394 for (auto *Pred : Entry->predecessors()) { 395 if (Pred != Dispatch) { 396 AllPreds.push_back(Pred); 397 } 398 } 399 } 400 401 // This set stores predecessors within this loop. 402 DenseSet<MachineBasicBlock *> InLoop; 403 for (auto *Pred : AllPreds) { 404 for (auto *Entry : Pred->successors()) { 405 if (!Entries.count(Entry)) 406 continue; 407 if (Graph.canReach(Entry, Pred)) { 408 InLoop.insert(Pred); 409 break; 410 } 411 } 412 } 413 414 // Record if each entry has a layout predecessor. This map stores 415 // <<loop entry, Predecessor is within the loop?>, layout predecessor> 416 DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *> 417 EntryToLayoutPred; 418 for (auto *Pred : AllPreds) { 419 bool PredInLoop = InLoop.count(Pred); 420 for (auto *Entry : Pred->successors()) 421 if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry)) 422 EntryToLayoutPred[{Entry, PredInLoop}] = Pred; 423 } 424 425 // We need to create at most two routing blocks per entry: one for 426 // predecessors outside the loop and one for predecessors inside the loop. 427 // This map stores 428 // <<loop entry, Predecessor is within the loop?>, routing block> 429 DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *> 430 Map; 431 for (auto *Pred : AllPreds) { 432 bool PredInLoop = InLoop.count(Pred); 433 for (auto *Entry : Pred->successors()) { 434 if (!Entries.count(Entry) || Map.count({Entry, PredInLoop})) 435 continue; 436 // If there exists a layout predecessor of this entry and this predecessor 437 // is not that, we rather create a routing block after that layout 438 // predecessor to save a branch. 439 if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop})) 440 if (OtherPred != Pred) 441 continue; 442 443 // This is a successor we need to rewrite. 444 MachineBasicBlock *Routing = MF.CreateMachineBasicBlock(); 445 MF.insert(Pred->isLayoutSuccessor(Entry) 446 ? MachineFunction::iterator(Entry) 447 : MF.end(), 448 Routing); 449 Blocks.insert(Routing); 450 451 // Set the jump table's register of the index of the block we wish to 452 // jump to, and jump to the jump table. 453 BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) 454 .addImm(Indices[Entry]); 455 BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch); 456 Routing->addSuccessor(Dispatch); 457 Map[{Entry, PredInLoop}] = Routing; 458 } 459 } 460 461 for (auto *Pred : AllPreds) { 462 bool PredInLoop = InLoop.count(Pred); 463 // Remap the terminator operands and the successor list. 464 for (MachineInstr &Term : Pred->terminators()) 465 for (auto &Op : Term.explicit_uses()) 466 if (Op.isMBB() && Indices.count(Op.getMBB())) 467 Op.setMBB(Map[{Op.getMBB(), PredInLoop}]); 468 469 for (auto *Succ : Pred->successors()) { 470 if (!Entries.count(Succ)) 471 continue; 472 auto *Routing = Map[{Succ, PredInLoop}]; 473 Pred->replaceSuccessor(Succ, Routing); 474 } 475 } 476 477 // Create a fake default label, because br_table requires one. 478 MIB.addMBB(MIB.getInstr() 479 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) 480 .getMBB()); 481 } 482 483 } // end anonymous namespace 484 485 char WebAssemblyFixIrreducibleControlFlow::ID = 0; 486 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, 487 "Removes irreducible control flow", false, false) 488 489 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { 490 return new WebAssemblyFixIrreducibleControlFlow(); 491 } 492 493 // Test whether the given register has an ARGUMENT def. 494 static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { 495 for (const auto &Def : MRI.def_instructions(Reg)) 496 if (WebAssembly::isArgument(Def.getOpcode())) 497 return true; 498 return false; 499 } 500 501 // Add a register definition with IMPLICIT_DEFs for every register to cover for 502 // register uses that don't have defs in every possible path. 503 // TODO: This is fairly heavy-handed; find a better approach. 504 static void addImplicitDefs(MachineFunction &MF) { 505 const MachineRegisterInfo &MRI = MF.getRegInfo(); 506 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 507 MachineBasicBlock &Entry = *MF.begin(); 508 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { 509 Register Reg = Register::index2VirtReg(I); 510 511 // Skip unused registers. 512 if (MRI.use_nodbg_empty(Reg)) 513 continue; 514 515 // Skip registers that have an ARGUMENT definition. 516 if (hasArgumentDef(Reg, MRI)) 517 continue; 518 519 BuildMI(Entry, Entry.begin(), DebugLoc(), 520 TII.get(WebAssembly::IMPLICIT_DEF), Reg); 521 } 522 523 // Move ARGUMENT_* instructions to the top of the entry block, so that their 524 // liveness reflects the fact that these really are live-in values. 525 for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) { 526 if (WebAssembly::isArgument(MI.getOpcode())) { 527 MI.removeFromParent(); 528 Entry.insert(Entry.begin(), &MI); 529 } 530 } 531 } 532 533 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( 534 MachineFunction &MF) { 535 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" 536 "********** Function: " 537 << MF.getName() << '\n'); 538 539 // Start the recursive process on the entire function body. 540 BlockSet AllBlocks; 541 for (auto &MBB : MF) { 542 AllBlocks.insert(&MBB); 543 } 544 545 if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) { 546 // We rewrote part of the function; recompute relevant things. 547 MF.RenumberBlocks(); 548 // Now we've inserted dispatch blocks, some register uses can have incoming 549 // paths without a def. For example, before this pass register %a was 550 // defined in BB1 and used in BB2, and there was only one path from BB1 and 551 // BB2. But if this pass inserts a dispatch block having multiple 552 // predecessors between the two BBs, now there are paths to BB2 without 553 // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding 554 // IMPLICIT_DEFs to all regs is one simple way to fix it. 555 addImplicitDefs(MF); 556 return true; 557 } 558 559 return false; 560 } 561