1 //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// \brief Does various transformations for exception handling. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 15 #include "WebAssembly.h" 16 #include "WebAssemblySubtarget.h" 17 #include "WebAssemblyUtilities.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/WasmEHFuncInfo.h" 21 #include "llvm/MC/MCAsmInfo.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Target/TargetMachine.h" 24 using namespace llvm; 25 26 #define DEBUG_TYPE "wasm-late-eh-prepare" 27 28 namespace { 29 class WebAssemblyLateEHPrepare final : public MachineFunctionPass { 30 StringRef getPassName() const override { 31 return "WebAssembly Late Prepare Exception"; 32 } 33 34 bool runOnMachineFunction(MachineFunction &MF) override; 35 bool removeUnreachableEHPads(MachineFunction &MF); 36 void recordCatchRetBBs(MachineFunction &MF); 37 bool hoistCatches(MachineFunction &MF); 38 bool addCatchAlls(MachineFunction &MF); 39 bool replaceFuncletReturns(MachineFunction &MF); 40 bool removeUnnecessaryUnreachables(MachineFunction &MF); 41 bool ensureSingleBBTermPads(MachineFunction &MF); 42 bool restoreStackPointer(MachineFunction &MF); 43 44 MachineBasicBlock *getMatchingEHPad(MachineInstr *MI); 45 SmallPtrSet<MachineBasicBlock *, 8> CatchRetBBs; 46 47 public: 48 static char ID; // Pass identification, replacement for typeid 49 WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {} 50 }; 51 } // end anonymous namespace 52 53 char WebAssemblyLateEHPrepare::ID = 0; 54 INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE, 55 "WebAssembly Late Exception Preparation", false, false) 56 57 FunctionPass *llvm::createWebAssemblyLateEHPrepare() { 58 return new WebAssemblyLateEHPrepare(); 59 } 60 61 // Returns the nearest EH pad that dominates this instruction. This does not use 62 // dominator analysis; it just does BFS on its predecessors until arriving at an 63 // EH pad. This assumes valid EH scopes so the first EH pad it arrives in all 64 // possible search paths should be the same. 65 // Returns nullptr in case it does not find any EH pad in the search, or finds 66 // multiple different EH pads. 67 MachineBasicBlock * 68 WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) { 69 MachineFunction *MF = MI->getParent()->getParent(); 70 SmallVector<MachineBasicBlock *, 2> WL; 71 SmallPtrSet<MachineBasicBlock *, 2> Visited; 72 WL.push_back(MI->getParent()); 73 MachineBasicBlock *EHPad = nullptr; 74 while (!WL.empty()) { 75 MachineBasicBlock *MBB = WL.pop_back_val(); 76 if (Visited.count(MBB)) 77 continue; 78 Visited.insert(MBB); 79 if (MBB->isEHPad()) { 80 if (EHPad && EHPad != MBB) 81 return nullptr; 82 EHPad = MBB; 83 continue; 84 } 85 if (MBB == &MF->front()) 86 return nullptr; 87 for (auto *Pred : MBB->predecessors()) 88 if (!CatchRetBBs.count(Pred)) // We don't go into child scopes 89 WL.push_back(Pred); 90 } 91 return EHPad; 92 } 93 94 // Erase the specified BBs if the BB does not have any remaining predecessors, 95 // and also all its dead children. 96 template <typename Container> 97 static void eraseDeadBBsAndChildren(const Container &MBBs) { 98 SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end()); 99 SmallPtrSet<MachineBasicBlock *, 8> Deleted; 100 while (!WL.empty()) { 101 MachineBasicBlock *MBB = WL.pop_back_val(); 102 if (Deleted.count(MBB) || !MBB->pred_empty()) 103 continue; 104 SmallVector<MachineBasicBlock *, 4> Succs(MBB->successors()); 105 WL.append(MBB->succ_begin(), MBB->succ_end()); 106 for (auto *Succ : Succs) 107 MBB->removeSuccessor(Succ); 108 // To prevent deleting the same BB multiple times, which can happen when 109 // 'MBBs' contain both a parent and a child 110 Deleted.insert(MBB); 111 MBB->eraseFromParent(); 112 } 113 } 114 115 bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { 116 LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n" 117 "********** Function: " 118 << MF.getName() << '\n'); 119 120 if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() != 121 ExceptionHandling::Wasm) 122 return false; 123 124 bool Changed = false; 125 if (MF.getFunction().hasPersonalityFn()) { 126 Changed |= removeUnreachableEHPads(MF); 127 recordCatchRetBBs(MF); 128 Changed |= hoistCatches(MF); 129 Changed |= addCatchAlls(MF); 130 Changed |= replaceFuncletReturns(MF); 131 Changed |= ensureSingleBBTermPads(MF); 132 } 133 Changed |= removeUnnecessaryUnreachables(MF); 134 if (MF.getFunction().hasPersonalityFn()) 135 Changed |= restoreStackPointer(MF); 136 return Changed; 137 } 138 139 // Remove unreachable EH pads and its children. If they remain, CFG 140 // stackification can be tricky. 141 bool WebAssemblyLateEHPrepare::removeUnreachableEHPads(MachineFunction &MF) { 142 SmallVector<MachineBasicBlock *, 4> ToDelete; 143 for (auto &MBB : MF) 144 if (MBB.isEHPad() && MBB.pred_empty()) 145 ToDelete.push_back(&MBB); 146 eraseDeadBBsAndChildren(ToDelete); 147 return !ToDelete.empty(); 148 } 149 150 // Record which BB ends with catchret instruction, because this will be replaced 151 // with 'br's later. This set of catchret BBs is necessary in 'getMatchingEHPad' 152 // function. 153 void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) { 154 CatchRetBBs.clear(); 155 for (auto &MBB : MF) { 156 auto Pos = MBB.getFirstTerminator(); 157 if (Pos == MBB.end()) 158 continue; 159 MachineInstr *TI = &*Pos; 160 if (TI->getOpcode() == WebAssembly::CATCHRET) 161 CatchRetBBs.insert(&MBB); 162 } 163 } 164 165 // Hoist catch instructions to the beginning of their matching EH pad BBs in 166 // case, 167 // (1) catch instruction is not the first instruction in EH pad. 168 // ehpad: 169 // some_other_instruction 170 // ... 171 // %exn = catch 0 172 // (2) catch instruction is in a non-EH pad BB. For example, 173 // ehpad: 174 // br bb0 175 // bb0: 176 // %exn = catch 0 177 bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) { 178 bool Changed = false; 179 SmallVector<MachineInstr *, 16> Catches; 180 for (auto &MBB : MF) 181 for (auto &MI : MBB) 182 if (WebAssembly::isCatch(MI.getOpcode())) 183 Catches.push_back(&MI); 184 185 for (auto *Catch : Catches) { 186 MachineBasicBlock *EHPad = getMatchingEHPad(Catch); 187 assert(EHPad && "No matching EH pad for catch"); 188 auto InsertPos = EHPad->begin(); 189 // Skip EH_LABELs in the beginning of an EH pad if present. We don't use 190 // these labels at the moment, but other targets also seem to have an 191 // EH_LABEL instruction in the beginning of an EH pad. 192 while (InsertPos != EHPad->end() && InsertPos->isEHLabel()) 193 InsertPos++; 194 if (InsertPos == Catch) 195 continue; 196 Changed = true; 197 EHPad->insert(InsertPos, Catch->removeFromParent()); 198 } 199 return Changed; 200 } 201 202 // Add catch_all to beginning of cleanup pads. 203 bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) { 204 bool Changed = false; 205 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 206 207 for (auto &MBB : MF) { 208 if (!MBB.isEHPad()) 209 continue; 210 auto InsertPos = MBB.begin(); 211 // Skip EH_LABELs in the beginning of an EH pad if present. 212 while (InsertPos != MBB.end() && InsertPos->isEHLabel()) 213 InsertPos++; 214 // This runs after hoistCatches(), so we assume that if there is a catch, 215 // that should be the non-EH label first instruction in an EH pad. 216 if (InsertPos == MBB.end() || 217 !WebAssembly::isCatch(InsertPos->getOpcode())) { 218 Changed = true; 219 BuildMI(MBB, InsertPos, InsertPos->getDebugLoc(), 220 TII.get(WebAssembly::CATCH_ALL)); 221 } 222 } 223 return Changed; 224 } 225 226 // Replace pseudo-instructions catchret and cleanupret with br and rethrow 227 // respectively. 228 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { 229 bool Changed = false; 230 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 231 232 for (auto &MBB : MF) { 233 auto Pos = MBB.getFirstTerminator(); 234 if (Pos == MBB.end()) 235 continue; 236 MachineInstr *TI = &*Pos; 237 238 switch (TI->getOpcode()) { 239 case WebAssembly::CATCHRET: { 240 // Replace a catchret with a branch 241 MachineBasicBlock *TBB = TI->getOperand(0).getMBB(); 242 if (!MBB.isLayoutSuccessor(TBB)) 243 BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR)) 244 .addMBB(TBB); 245 TI->eraseFromParent(); 246 Changed = true; 247 break; 248 } 249 case WebAssembly::CLEANUPRET: { 250 // Replace a cleanupret with a rethrow. For C++ support, currently 251 // rethrow's immediate argument is always 0 (= the latest exception). 252 BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) 253 .addImm(0); 254 TI->eraseFromParent(); 255 Changed = true; 256 break; 257 } 258 } 259 } 260 return Changed; 261 } 262 263 // Remove unnecessary unreachables after a throw or rethrow. 264 bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( 265 MachineFunction &MF) { 266 bool Changed = false; 267 for (auto &MBB : MF) { 268 for (auto &MI : MBB) { 269 if (MI.getOpcode() != WebAssembly::THROW && 270 MI.getOpcode() != WebAssembly::RETHROW) 271 continue; 272 Changed = true; 273 274 // The instruction after the throw should be an unreachable or a branch to 275 // another BB that should eventually lead to an unreachable. Delete it 276 // because throw itself is a terminator, and also delete successors if 277 // any. 278 MBB.erase(std::next(MI.getIterator()), MBB.end()); 279 SmallVector<MachineBasicBlock *, 8> Succs(MBB.successors()); 280 for (auto *Succ : Succs) 281 if (!Succ->isEHPad()) 282 MBB.removeSuccessor(Succ); 283 eraseDeadBBsAndChildren(Succs); 284 } 285 } 286 287 return Changed; 288 } 289 290 // Clang-generated terminate pads are an single-BB EH pad in the form of 291 // termpad: 292 // %exn = catch $__cpp_exception 293 // call @__clang_call_terminate(%exn) 294 // unreachable 295 // (There can be local.set and local.gets before the call if we didn't run 296 // RegStackify) 297 // But code transformations can change or add more control flow, so the call to 298 // __clang_call_terminate() function may not be in the original EH pad anymore. 299 // This ensures every terminate pad is a single BB in the form illustrated 300 // above. 301 // 302 // This is preparation work for the HandleEHTerminatePads pass later, which 303 // duplicates terminate pads both for 'catch' and 'catch_all'. Refer to 304 // WebAssemblyHandleEHTerminatePads.cpp for details. 305 bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) { 306 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 307 308 // Find calls to __clang_call_terminate() 309 SmallVector<MachineInstr *, 8> ClangCallTerminateCalls; 310 SmallPtrSet<MachineBasicBlock *, 8> TermPads; 311 for (auto &MBB : MF) { 312 for (auto &MI : MBB) { 313 if (MI.isCall()) { 314 const MachineOperand &CalleeOp = MI.getOperand(0); 315 if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() == 316 WebAssembly::ClangCallTerminateFn) { 317 MachineBasicBlock *EHPad = getMatchingEHPad(&MI); 318 assert(EHPad && "No matching EH pad for __clang_call_terminate"); 319 // In case a __clang_call_terminate call is duplicated during code 320 // transformation so one terminate pad contains multiple 321 // __clang_call_terminate calls, we only count one of them 322 if (TermPads.insert(EHPad).second) 323 ClangCallTerminateCalls.push_back(&MI); 324 } 325 } 326 } 327 } 328 329 bool Changed = false; 330 for (auto *Call : ClangCallTerminateCalls) { 331 MachineBasicBlock *EHPad = getMatchingEHPad(Call); 332 assert(EHPad && "No matching EH pad for __clang_call_terminate"); 333 334 // If it is already the form we want, skip it 335 if (Call->getParent() == EHPad && 336 Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE) 337 continue; 338 339 // In case the __clang_call_terminate() call is not in its matching EH pad, 340 // move the call to the end of EH pad and add an unreachable instruction 341 // after that. Delete all successors and their children if any, because here 342 // the program terminates. 343 Changed = true; 344 // This runs after hoistCatches(), so catch instruction should be at the top 345 MachineInstr *Catch = WebAssembly::findCatch(EHPad); 346 assert(Catch && "EH pad does not have a catch instruction"); 347 // Takes the result register of the catch instruction as argument. There may 348 // have been some other local.set/local.gets in between, but at this point 349 // we don't care. 350 Call->getOperand(1).setReg(Catch->getOperand(0).getReg()); 351 auto InsertPos = std::next(MachineBasicBlock::iterator(Catch)); 352 EHPad->insert(InsertPos, Call->removeFromParent()); 353 BuildMI(*EHPad, InsertPos, Call->getDebugLoc(), 354 TII.get(WebAssembly::UNREACHABLE)); 355 EHPad->erase(InsertPos, EHPad->end()); 356 SmallVector<MachineBasicBlock *, 8> Succs(EHPad->successors()); 357 for (auto *Succ : Succs) 358 EHPad->removeSuccessor(Succ); 359 eraseDeadBBsAndChildren(Succs); 360 } 361 return Changed; 362 } 363 364 // After the stack is unwound due to a thrown exception, the __stack_pointer 365 // global can point to an invalid address. This inserts instructions that 366 // restore __stack_pointer global. 367 bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { 368 const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>( 369 MF.getSubtarget().getFrameLowering()); 370 if (!FrameLowering->needsPrologForEH(MF)) 371 return false; 372 bool Changed = false; 373 374 for (auto &MBB : MF) { 375 if (!MBB.isEHPad()) 376 continue; 377 Changed = true; 378 379 // Insert __stack_pointer restoring instructions at the beginning of each EH 380 // pad, after the catch instruction. Here it is safe to assume that SP32 381 // holds the latest value of __stack_pointer, because the only exception for 382 // this case is when a function uses the red zone, but that only happens 383 // with leaf functions, and we don't restore __stack_pointer in leaf 384 // functions anyway. 385 auto InsertPos = MBB.begin(); 386 if (InsertPos->isEHLabel()) // EH pad starts with an EH label 387 ++InsertPos; 388 if (WebAssembly::isCatch(InsertPos->getOpcode())) 389 ++InsertPos; 390 FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB, 391 InsertPos, MBB.begin()->getDebugLoc()); 392 } 393 return Changed; 394 } 395