xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file lowers exception-related instructions and setjmp/longjmp function
11 /// calls to use Emscripten's library functions. The pass uses JavaScript's try
12 /// and catch mechanism in case of Emscripten EH/SjLj and Wasm EH intrinsics in
13 /// case of Emscripten SjLJ.
14 ///
15 /// * Emscripten exception handling
16 /// This pass lowers invokes and landingpads into library functions in JS glue
17 /// code. Invokes are lowered into function wrappers called invoke wrappers that
18 /// exist in JS side, which wraps the original function call with JS try-catch.
19 /// If an exception occurred, cxa_throw() function in JS side sets some
20 /// variables (see below) so we can check whether an exception occurred from
21 /// wasm code and handle it appropriately.
22 ///
23 /// * Emscripten setjmp-longjmp handling
24 /// This pass lowers setjmp to a reasonably-performant approach for emscripten.
25 /// The idea is that each block with a setjmp is broken up into two parts: the
26 /// part containing setjmp and the part right after the setjmp. The latter part
27 /// is either reached from the setjmp, or later from a longjmp. To handle the
28 /// longjmp, all calls that might longjmp are also called using invoke wrappers
29 /// and thus JS / try-catch. JS longjmp() function also sets some variables so
30 /// we can check / whether a longjmp occurred from wasm code. Each block with a
31 /// function call that might longjmp is also split up after the longjmp call.
32 /// After the longjmp call, we check whether a longjmp occurred, and if it did,
33 /// which setjmp it corresponds to, and jump to the right post-setjmp block.
34 /// We assume setjmp-longjmp handling always run after EH handling, which means
35 /// we don't expect any exception-related instructions when SjLj runs.
36 /// FIXME Currently this scheme does not support indirect call of setjmp,
37 /// because of the limitation of the scheme itself. fastcomp does not support it
38 /// either.
39 ///
40 /// In detail, this pass does following things:
41 ///
42 /// 1) Assumes the existence of global variables: __THREW__, __threwValue
43 ///    __THREW__ and __threwValue are defined in compiler-rt in Emscripten.
44 ///    These variables are used for both exceptions and setjmp/longjmps.
45 ///    __THREW__ indicates whether an exception or a longjmp occurred or not. 0
46 ///    means nothing occurred, 1 means an exception occurred, and other numbers
47 ///    mean a longjmp occurred. In the case of longjmp, __THREW__ variable
48 ///    indicates the corresponding setjmp buffer the longjmp corresponds to.
49 ///    __threwValue is 0 for exceptions, and the argument to longjmp in case of
50 ///    longjmp.
51 ///
52 /// * Emscripten exception handling
53 ///
54 /// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions
55 ///    at link time. setThrew exists in Emscripten's compiler-rt:
56 ///
57 ///    void setThrew(uintptr_t threw, int value) {
58 ///      if (__THREW__ == 0) {
59 ///        __THREW__ = threw;
60 ///        __threwValue = value;
61 ///      }
62 ///    }
63 //
64 ///    setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
65 ///    In exception handling, getTempRet0 indicates the type of an exception
66 ///    caught, and in setjmp/longjmp, it means the second argument to longjmp
67 ///    function.
68 ///
69 /// 3) Lower
70 ///      invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
71 ///    into
72 ///      __THREW__ = 0;
73 ///      call @__invoke_SIG(func, arg1, arg2)
74 ///      %__THREW__.val = __THREW__;
75 ///      __THREW__ = 0;
76 ///      if (%__THREW__.val == 1)
77 ///        goto %lpad
78 ///      else
79 ///         goto %invoke.cont
80 ///    SIG is a mangled string generated based on the LLVM IR-level function
81 ///    signature. After LLVM IR types are lowered to the target wasm types,
82 ///    the names for these wrappers will change based on wasm types as well,
83 ///    as in invoke_vi (function takes an int and returns void). The bodies of
84 ///    these wrappers will be generated in JS glue code, and inside those
85 ///    wrappers we use JS try-catch to generate actual exception effects. It
86 ///    also calls the original callee function. An example wrapper in JS code
87 ///    would look like this:
88 ///      function invoke_vi(index,a1) {
89 ///        try {
90 ///          Module["dynCall_vi"](index,a1); // This calls original callee
91 ///        } catch(e) {
92 ///          if (typeof e !== 'number' && e !== 'longjmp') throw e;
93 ///          _setThrew(1, 0); // setThrew is called here
94 ///        }
95 ///      }
96 ///    If an exception is thrown, __THREW__ will be set to true in a wrapper,
97 ///    so we can jump to the right BB based on this value.
98 ///
99 /// 4) Lower
100 ///      %val = landingpad catch c1 catch c2 catch c3 ...
101 ///      ... use %val ...
102 ///    into
103 ///      %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...)
104 ///      %val = {%fmc, getTempRet0()}
105 ///      ... use %val ...
106 ///    Here N is a number calculated based on the number of clauses.
107 ///    setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
108 ///
109 /// 5) Lower
110 ///      resume {%a, %b}
111 ///    into
112 ///      call @__resumeException(%a)
113 ///    where __resumeException() is a function in JS glue code.
114 ///
115 /// 6) Lower
116 ///      call @llvm.eh.typeid.for(type) (intrinsic)
117 ///    into
118 ///      call @llvm_eh_typeid_for(type)
119 ///    llvm_eh_typeid_for function will be generated in JS glue code.
120 ///
121 /// * Emscripten setjmp / longjmp handling
122 ///
123 /// If there are calls to longjmp()
124 ///
125 /// 1) Lower
126 ///      longjmp(env, val)
127 ///    into
128 ///      emscripten_longjmp(env, val)
129 ///
130 /// If there are calls to setjmp()
131 ///
132 /// 2) In the function entry that calls setjmp, initialize
133 ///    functionInvocationId as follows:
134 ///
135 ///    functionInvocationId = alloca(4)
136 ///
137 ///    Note: the alloca size is not important as this pointer is
138 ///    merely used for pointer comparisions.
139 ///
140 /// 3) Lower
141 ///      setjmp(env)
142 ///    into
143 ///      __wasm_setjmp(env, label, functionInvocationId)
144 ///
145 ///    __wasm_setjmp records the necessary info (the label and
146 ///    functionInvocationId) to the "env".
147 ///    A BB with setjmp is split into two after setjmp call in order to
148 ///    make the post-setjmp BB the possible destination of longjmp BB.
149 ///
150 /// 4) Lower every call that might longjmp into
151 ///      __THREW__ = 0;
152 ///      call @__invoke_SIG(func, arg1, arg2)
153 ///      %__THREW__.val = __THREW__;
154 ///      __THREW__ = 0;
155 ///      %__threwValue.val = __threwValue;
156 ///      if (%__THREW__.val != 0 & %__threwValue.val != 0) {
157 ///        %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId);
158 ///        if (%label == 0)
159 ///          emscripten_longjmp(%__THREW__.val, %__threwValue.val);
160 ///        setTempRet0(%__threwValue.val);
161 ///      } else {
162 ///        %label = -1;
163 ///      }
164 ///      longjmp_result = getTempRet0();
165 ///      switch %label {
166 ///        label 1: goto post-setjmp BB 1
167 ///        label 2: goto post-setjmp BB 2
168 ///        ...
169 ///        default: goto splitted next BB
170 ///      }
171 ///
172 ///    __wasm_setjmp_test examines the jmp buf to see if it was for a matching
173 ///    setjmp call. After calling an invoke wrapper, if a longjmp occurred,
174 ///    __THREW__ will be the address of matching jmp_buf buffer and
175 ///    __threwValue be the second argument to longjmp.
176 ///    __wasm_setjmp_test returns a setjmp label, a unique ID to each setjmp
177 ///    callsite. Label 0 means this longjmp buffer does not correspond to one
178 ///    of the setjmp callsites in this function, so in this case we just chain
179 ///    the longjmp to the caller. Label -1 means no longjmp occurred.
180 ///    Otherwise we jump to the right post-setjmp BB based on the label.
181 ///
182 /// * Wasm setjmp / longjmp handling
183 /// This mode still uses some Emscripten library functions but not JavaScript's
184 /// try-catch mechanism. It instead uses Wasm exception handling intrinsics,
185 /// which will be lowered to exception handling instructions.
186 ///
187 /// If there are calls to longjmp()
188 ///
189 /// 1) Lower
190 ///      longjmp(env, val)
191 ///    into
192 ///      __wasm_longjmp(env, val)
193 ///
194 /// If there are calls to setjmp()
195 ///
196 /// 2) and 3): The same as 2) and 3) in Emscripten SjLj.
197 /// (functionInvocationId initialization + setjmp callsite transformation)
198 ///
199 /// 4) Create a catchpad with a wasm.catch() intrinsic, which returns the value
200 /// thrown by __wasm_longjmp function. In the runtime library, we have an
201 /// equivalent of the following struct:
202 ///
203 /// struct __WasmLongjmpArgs {
204 ///   void *env;
205 ///   int val;
206 /// };
207 ///
208 /// The thrown value here is a pointer to the struct. We use this struct to
209 /// transfer two values by throwing a single value. Wasm throw and catch
210 /// instructions are capable of throwing and catching multiple values, but
211 /// it also requires multivalue support that is currently not very reliable.
212 /// TODO Switch to throwing and catching two values without using the struct
213 ///
214 /// All longjmpable function calls will be converted to an invoke that will
215 /// unwind to this catchpad in case a longjmp occurs. Within the catchpad, we
216 /// test the thrown values using __wasm_setjmp_test function as we do for
217 /// Emscripten SjLj. The main difference is, in Emscripten SjLj, we need to
218 /// transform every longjmpable callsite into a sequence of code including
219 /// __wasm_setjmp_test() call; in Wasm SjLj we do the testing in only one
220 /// place, in this catchpad.
221 ///
222 /// After testing calling __wasm_setjmp_test(), if the longjmp does not
223 /// correspond to one of the setjmps within the current function, it rethrows
224 /// the longjmp by calling __wasm_longjmp(). If it corresponds to one of
225 /// setjmps in the function, we jump to the beginning of the function, which
226 /// contains a switch to each post-setjmp BB. Again, in Emscripten SjLj, this
227 /// switch is added for every longjmpable callsite; in Wasm SjLj we do this
228 /// only once at the top of the function. (after functionInvocationId
229 /// initialization)
230 ///
231 /// The below is the pseudocode for what we have described
232 ///
233 /// entry:
234 ///   Initialize functionInvocationId
235 ///
236 /// setjmp.dispatch:
237 ///    switch %label {
238 ///      label 1: goto post-setjmp BB 1
239 ///      label 2: goto post-setjmp BB 2
240 ///      ...
241 ///      default: goto splitted next BB
242 ///    }
243 /// ...
244 ///
245 /// bb:
246 ///   invoke void @foo() ;; foo is a longjmpable function
247 ///     to label %next unwind label %catch.dispatch.longjmp
248 /// ...
249 ///
250 /// catch.dispatch.longjmp:
251 ///   %0 = catchswitch within none [label %catch.longjmp] unwind to caller
252 ///
253 /// catch.longjmp:
254 ///   %longjmp.args = wasm.catch() ;; struct __WasmLongjmpArgs
255 ///   %env = load 'env' field from __WasmLongjmpArgs
256 ///   %val = load 'val' field from __WasmLongjmpArgs
257 ///   %label = __wasm_setjmp_test(%env, functionInvocationId);
258 ///   if (%label == 0)
259 ///     __wasm_longjmp(%env, %val)
260 ///   catchret to %setjmp.dispatch
261 ///
262 ///===----------------------------------------------------------------------===//
263 
264 #include "WebAssembly.h"
265 #include "WebAssemblyTargetMachine.h"
266 #include "llvm/ADT/StringExtras.h"
267 #include "llvm/CodeGen/TargetPassConfig.h"
268 #include "llvm/CodeGen/WasmEHFuncInfo.h"
269 #include "llvm/IR/DebugInfoMetadata.h"
270 #include "llvm/IR/Dominators.h"
271 #include "llvm/IR/IRBuilder.h"
272 #include "llvm/IR/IntrinsicsWebAssembly.h"
273 #include "llvm/IR/Module.h"
274 #include "llvm/Support/CommandLine.h"
275 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
276 #include "llvm/Transforms/Utils/Local.h"
277 #include "llvm/Transforms/Utils/SSAUpdater.h"
278 #include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
279 #include <set>
280 
281 using namespace llvm;
282 
283 #define DEBUG_TYPE "wasm-lower-em-ehsjlj"
284 
285 static cl::list<std::string>
286     EHAllowlist("emscripten-cxx-exceptions-allowed",
287                 cl::desc("The list of function names in which Emscripten-style "
288                          "exception handling is enabled (see emscripten "
289                          "EMSCRIPTEN_CATCHING_ALLOWED options)"),
290                 cl::CommaSeparated);
291 
292 namespace {
293 class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
294   bool EnableEmEH;     // Enable Emscripten exception handling
295   bool EnableEmSjLj;   // Enable Emscripten setjmp/longjmp handling
296   bool EnableWasmSjLj; // Enable Wasm setjmp/longjmp handling
297   bool DoSjLj;         // Whether we actually perform setjmp/longjmp handling
298 
299   GlobalVariable *ThrewGV = nullptr;      // __THREW__ (Emscripten)
300   GlobalVariable *ThrewValueGV = nullptr; // __threwValue (Emscripten)
301   Function *GetTempRet0F = nullptr;       // getTempRet0() (Emscripten)
302   Function *SetTempRet0F = nullptr;       // setTempRet0() (Emscripten)
303   Function *ResumeF = nullptr;            // __resumeException() (Emscripten)
304   Function *EHTypeIDF = nullptr;          // llvm.eh.typeid.for() (intrinsic)
305   Function *EmLongjmpF = nullptr;         // emscripten_longjmp() (Emscripten)
306   Function *WasmSetjmpF = nullptr;        // __wasm_setjmp() (Emscripten)
307   Function *WasmSetjmpTestF = nullptr;    // __wasm_setjmp_test() (Emscripten)
308   Function *WasmLongjmpF = nullptr;       // __wasm_longjmp() (Emscripten)
309   Function *CatchF = nullptr;             // wasm.catch() (intrinsic)
310 
311   // type of 'struct __WasmLongjmpArgs' defined in emscripten
312   Type *LongjmpArgsTy = nullptr;
313 
314   // __cxa_find_matching_catch_N functions.
315   // Indexed by the number of clauses in an original landingpad instruction.
316   DenseMap<int, Function *> FindMatchingCatches;
317   // Map of <function signature string, invoke_ wrappers>
318   StringMap<Function *> InvokeWrappers;
319   // Set of allowed function names for exception handling
320   std::set<std::string, std::less<>> EHAllowlistSet;
321   // Functions that contains calls to setjmp
322   SmallPtrSet<Function *, 8> SetjmpUsers;
323 
324   StringRef getPassName() const override {
325     return "WebAssembly Lower Emscripten Exceptions";
326   }
327 
328   using InstVector = SmallVectorImpl<Instruction *>;
329   bool runEHOnFunction(Function &F);
330   bool runSjLjOnFunction(Function &F);
331   void handleLongjmpableCallsForEmscriptenSjLj(
332       Function &F, Instruction *FunctionInvocationId,
333       SmallVectorImpl<PHINode *> &SetjmpRetPHIs);
334   void
335   handleLongjmpableCallsForWasmSjLj(Function &F,
336                                     Instruction *FunctionInvocationId,
337                                     SmallVectorImpl<PHINode *> &SetjmpRetPHIs);
338   Function *getFindMatchingCatch(Module &M, unsigned NumClauses);
339 
340   Value *wrapInvoke(CallBase *CI);
341   void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw,
342                       Value *FunctionInvocationId, Value *&Label,
343                       Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB,
344                       PHINode *&CallEmLongjmpBBThrewPHI,
345                       PHINode *&CallEmLongjmpBBThrewValuePHI,
346                       BasicBlock *&EndBB);
347   Function *getInvokeWrapper(CallBase *CI);
348 
349   bool areAllExceptionsAllowed() const { return EHAllowlistSet.empty(); }
350   bool supportsException(const Function *F) const {
351     return EnableEmEH &&
352            (areAllExceptionsAllowed() || EHAllowlistSet.count(F->getName()));
353   }
354   void replaceLongjmpWith(Function *LongjmpF, Function *NewF);
355 
356   void rebuildSSA(Function &F);
357 
358 public:
359   static char ID;
360 
361   WebAssemblyLowerEmscriptenEHSjLj()
362       : ModulePass(ID), EnableEmEH(WebAssembly::WasmEnableEmEH),
363         EnableEmSjLj(WebAssembly::WasmEnableEmSjLj),
364         EnableWasmSjLj(WebAssembly::WasmEnableSjLj) {
365     assert(!(EnableEmSjLj && EnableWasmSjLj) &&
366            "Two SjLj modes cannot be turned on at the same time");
367     assert(!(EnableEmEH && EnableWasmSjLj) &&
368            "Wasm SjLj should be only used with Wasm EH");
369     EHAllowlistSet.insert(EHAllowlist.begin(), EHAllowlist.end());
370   }
371   bool runOnModule(Module &M) override;
372 
373   void getAnalysisUsage(AnalysisUsage &AU) const override {
374     AU.addRequired<DominatorTreeWrapperPass>();
375   }
376 };
377 } // End anonymous namespace
378 
379 char WebAssemblyLowerEmscriptenEHSjLj::ID = 0;
380 INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE,
381                 "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp",
382                 false, false)
383 
384 ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj() {
385   return new WebAssemblyLowerEmscriptenEHSjLj();
386 }
387 
388 static bool canThrow(const Value *V) {
389   if (const auto *F = dyn_cast<const Function>(V)) {
390     // Intrinsics cannot throw
391     if (F->isIntrinsic())
392       return false;
393     StringRef Name = F->getName();
394     // leave setjmp and longjmp (mostly) alone, we process them properly later
395     if (Name == "setjmp" || Name == "longjmp" || Name == "emscripten_longjmp")
396       return false;
397     return !F->doesNotThrow();
398   }
399   // not a function, so an indirect call - can throw, we can't tell
400   return true;
401 }
402 
403 // Get a thread-local global variable with the given name. If it doesn't exist
404 // declare it, which will generate an import and assume that it will exist at
405 // link time.
406 static GlobalVariable *getGlobalVariable(Module &M, Type *Ty,
407                                          WebAssemblyTargetMachine &TM,
408                                          const char *Name) {
409   auto *GV = dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, Ty));
410   if (!GV)
411     report_fatal_error(Twine("unable to create global: ") + Name);
412 
413   // Variables created by this function are thread local. If the target does not
414   // support TLS, we depend on CoalesceFeaturesAndStripAtomics to downgrade it
415   // to non-thread-local ones, in which case we don't allow this object to be
416   // linked with other objects using shared memory.
417   GV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel);
418   return GV;
419 }
420 
421 // Simple function name mangler.
422 // This function simply takes LLVM's string representation of parameter types
423 // and concatenate them with '_'. There are non-alphanumeric characters but llc
424 // is ok with it, and we need to postprocess these names after the lowering
425 // phase anyway.
426 static std::string getSignature(FunctionType *FTy) {
427   std::string Sig;
428   raw_string_ostream OS(Sig);
429   OS << *FTy->getReturnType();
430   for (Type *ParamTy : FTy->params())
431     OS << "_" << *ParamTy;
432   if (FTy->isVarArg())
433     OS << "_...";
434   Sig = OS.str();
435   erase_if(Sig, isSpace);
436   // When s2wasm parses .s file, a comma means the end of an argument. So a
437   // mangled function name can contain any character but a comma.
438   llvm::replace(Sig, ',', '.');
439   return Sig;
440 }
441 
442 static Function *getFunction(FunctionType *Ty, const Twine &Name, Module *M) {
443   return Function::Create(Ty, GlobalValue::ExternalLinkage, Name, M);
444 }
445 
446 static void markAsImported(Function *F) {
447   // Tell the linker that this function is expected to be imported from the
448   // 'env' module. This is necessary for functions that do not have fixed names
449   // (e.g. __import_xyz).  These names cannot be provided by any kind of shared
450   // or static library as instead we mark them explictly as imported.
451   if (!F->hasFnAttribute("wasm-import-module")) {
452     llvm::AttrBuilder B(F->getParent()->getContext());
453     B.addAttribute("wasm-import-module", "env");
454     F->addFnAttrs(B);
455   }
456   if (!F->hasFnAttribute("wasm-import-name")) {
457     llvm::AttrBuilder B(F->getParent()->getContext());
458     B.addAttribute("wasm-import-name", F->getName());
459     F->addFnAttrs(B);
460   }
461 }
462 
463 // Returns an integer type for the target architecture's address space.
464 // i32 for wasm32 and i64 for wasm64.
465 static Type *getAddrIntType(Module *M) {
466   IRBuilder<> IRB(M->getContext());
467   return IRB.getIntNTy(M->getDataLayout().getPointerSizeInBits());
468 }
469 
470 // Returns an integer pointer type for the target architecture's address space.
471 // i32* for wasm32 and i64* for wasm64. With opaque pointers this is just a ptr
472 // in address space zero.
473 static Type *getAddrPtrType(Module *M) {
474   return PointerType::getUnqual(M->getContext());
475 }
476 
477 // Returns an integer whose type is the integer type for the target's address
478 // space. Returns (i32 C) for wasm32 and (i64 C) for wasm64, when C is the
479 // integer.
480 static Value *getAddrSizeInt(Module *M, uint64_t C) {
481   IRBuilder<> IRB(M->getContext());
482   return IRB.getIntN(M->getDataLayout().getPointerSizeInBits(), C);
483 }
484 
485 // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2.
486 // This is because a landingpad instruction contains two more arguments, a
487 // personality function and a cleanup bit, and __cxa_find_matching_catch_N
488 // functions are named after the number of arguments in the original landingpad
489 // instruction.
490 Function *
491 WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M,
492                                                        unsigned NumClauses) {
493   auto [It, Inserted] = FindMatchingCatches.try_emplace(NumClauses);
494   if (!Inserted)
495     return It->second;
496   PointerType *Int8PtrTy = PointerType::getUnqual(M.getContext());
497   SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy);
498   FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false);
499   Function *F = getFunction(
500       FTy, "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M);
501   markAsImported(F);
502   It->second = F;
503   return F;
504 }
505 
506 // Generate invoke wrapper seqence with preamble and postamble
507 // Preamble:
508 // __THREW__ = 0;
509 // Postamble:
510 // %__THREW__.val = __THREW__; __THREW__ = 0;
511 // Returns %__THREW__.val, which indicates whether an exception is thrown (or
512 // whether longjmp occurred), for future use.
513 Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
514   Module *M = CI->getModule();
515   LLVMContext &C = M->getContext();
516 
517   IRBuilder<> IRB(C);
518   IRB.SetInsertPoint(CI);
519 
520   // Pre-invoke
521   // __THREW__ = 0;
522   IRB.CreateStore(getAddrSizeInt(M, 0), ThrewGV);
523 
524   // Invoke function wrapper in JavaScript
525   SmallVector<Value *, 16> Args;
526   // Put the pointer to the callee as first argument, so it can be called
527   // within the invoke wrapper later
528   Args.push_back(CI->getCalledOperand());
529   Args.append(CI->arg_begin(), CI->arg_end());
530   CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args);
531   NewCall->takeName(CI);
532   NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke);
533   NewCall->setDebugLoc(CI->getDebugLoc());
534 
535   // Because we added the pointer to the callee as first argument, all
536   // argument attribute indices have to be incremented by one.
537   SmallVector<AttributeSet, 8> ArgAttributes;
538   const AttributeList &InvokeAL = CI->getAttributes();
539 
540   // No attributes for the callee pointer.
541   ArgAttributes.push_back(AttributeSet());
542   // Copy the argument attributes from the original
543   for (unsigned I = 0, E = CI->arg_size(); I < E; ++I)
544     ArgAttributes.push_back(InvokeAL.getParamAttrs(I));
545 
546   AttrBuilder FnAttrs(CI->getContext(), InvokeAL.getFnAttrs());
547   if (auto Args = FnAttrs.getAllocSizeArgs()) {
548     // The allocsize attribute (if any) referes to parameters by index and needs
549     // to be adjusted.
550     auto [SizeArg, NEltArg] = *Args;
551     SizeArg += 1;
552     if (NEltArg)
553       NEltArg = *NEltArg + 1;
554     FnAttrs.addAllocSizeAttr(SizeArg, NEltArg);
555   }
556   // In case the callee has 'noreturn' attribute, We need to remove it, because
557   // we expect invoke wrappers to return.
558   FnAttrs.removeAttribute(Attribute::NoReturn);
559 
560   // Reconstruct the AttributesList based on the vector we constructed.
561   AttributeList NewCallAL = AttributeList::get(
562       C, AttributeSet::get(C, FnAttrs), InvokeAL.getRetAttrs(), ArgAttributes);
563   NewCall->setAttributes(NewCallAL);
564 
565   CI->replaceAllUsesWith(NewCall);
566 
567   // Post-invoke
568   // %__THREW__.val = __THREW__; __THREW__ = 0;
569   Value *Threw =
570       IRB.CreateLoad(getAddrIntType(M), ThrewGV, ThrewGV->getName() + ".val");
571   IRB.CreateStore(getAddrSizeInt(M, 0), ThrewGV);
572   return Threw;
573 }
574 
575 // Get matching invoke wrapper based on callee signature
576 Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) {
577   Module *M = CI->getModule();
578   SmallVector<Type *, 16> ArgTys;
579   FunctionType *CalleeFTy = CI->getFunctionType();
580 
581   std::string Sig = getSignature(CalleeFTy);
582   auto It = InvokeWrappers.find(Sig);
583   if (It != InvokeWrappers.end())
584     return It->second;
585 
586   // Put the pointer to the callee as first argument
587   ArgTys.push_back(PointerType::getUnqual(CI->getContext()));
588   // Add argument types
589   ArgTys.append(CalleeFTy->param_begin(), CalleeFTy->param_end());
590 
591   FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys,
592                                         CalleeFTy->isVarArg());
593   Function *F = getFunction(FTy, "__invoke_" + Sig, M);
594   markAsImported(F);
595   InvokeWrappers[Sig] = F;
596   return F;
597 }
598 
599 static bool canLongjmp(const Value *Callee) {
600   if (auto *CalleeF = dyn_cast<Function>(Callee))
601     if (CalleeF->isIntrinsic())
602       return false;
603 
604   // Attempting to transform inline assembly will result in something like:
605   //     call void @__invoke_void(void ()* asm ...)
606   // which is invalid because inline assembly blocks do not have addresses
607   // and can't be passed by pointer. The result is a crash with illegal IR.
608   if (isa<InlineAsm>(Callee))
609     return false;
610   StringRef CalleeName = Callee->getName();
611 
612   // TODO Include more functions or consider checking with mangled prefixes
613 
614   // The reason we include malloc/free here is to exclude the malloc/free
615   // calls generated in setjmp prep / cleanup routines.
616   if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free")
617     return false;
618 
619   // There are functions in Emscripten's JS glue code or compiler-rt
620   if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" ||
621       CalleeName == "__wasm_setjmp" || CalleeName == "__wasm_setjmp_test" ||
622       CalleeName == "getTempRet0" || CalleeName == "setTempRet0")
623     return false;
624 
625   // __cxa_find_matching_catch_N functions cannot longjmp
626   if (Callee->getName().starts_with("__cxa_find_matching_catch_"))
627     return false;
628 
629   // Exception-catching related functions
630   //
631   // We intentionally treat __cxa_end_catch longjmpable in Wasm SjLj even though
632   // it surely cannot longjmp, in order to maintain the unwind relationship from
633   // all existing catchpads (and calls within them) to catch.dispatch.longjmp.
634   //
635   // In Wasm EH + Wasm SjLj, we
636   // 1. Make all catchswitch and cleanuppad that unwind to caller unwind to
637   //    catch.dispatch.longjmp instead
638   // 2. Convert all longjmpable calls to invokes that unwind to
639   //    catch.dispatch.longjmp
640   // But catchswitch BBs are removed in isel, so if an EH catchswitch (generated
641   // from an exception)'s catchpad does not contain any calls that are converted
642   // into invokes unwinding to catch.dispatch.longjmp, this unwind relationship
643   // (EH catchswitch BB -> catch.dispatch.longjmp BB) is lost and
644   // catch.dispatch.longjmp BB can be placed before the EH catchswitch BB in
645   // CFGSort.
646   // int ret = setjmp(buf);
647   // try {
648   //   foo(); // longjmps
649   // } catch (...) {
650   // }
651   // Then in this code, if 'foo' longjmps, it first unwinds to 'catch (...)'
652   // catchswitch, and is not caught by that catchswitch because it is a longjmp,
653   // then it should next unwind to catch.dispatch.longjmp BB. But if this 'catch
654   // (...)' catchswitch -> catch.dispatch.longjmp unwind relationship is lost,
655   // it will not unwind to catch.dispatch.longjmp, producing an incorrect
656   // result.
657   //
658   // Every catchpad generated by Wasm C++ contains __cxa_end_catch, so we
659   // intentionally treat it as longjmpable to work around this problem. This is
660   // a hacky fix but an easy one.
661   if (CalleeName == "__cxa_end_catch")
662     return WebAssembly::WasmEnableSjLj;
663   if (CalleeName == "__cxa_begin_catch" ||
664       CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" ||
665       CalleeName == "__clang_call_terminate")
666     return false;
667 
668   // std::terminate, which is generated when another exception occurs while
669   // handling an exception, cannot longjmp.
670   if (CalleeName == "_ZSt9terminatev")
671     return false;
672 
673   // Otherwise we don't know
674   return true;
675 }
676 
677 static bool isEmAsmCall(const Value *Callee) {
678   StringRef CalleeName = Callee->getName();
679   // This is an exhaustive list from Emscripten's <emscripten/em_asm.h>.
680   return CalleeName == "emscripten_asm_const_int" ||
681          CalleeName == "emscripten_asm_const_double" ||
682          CalleeName == "emscripten_asm_const_int_sync_on_main_thread" ||
683          CalleeName == "emscripten_asm_const_double_sync_on_main_thread" ||
684          CalleeName == "emscripten_asm_const_async_on_main_thread";
685 }
686 
687 // Generate __wasm_setjmp_test function call seqence with preamble and
688 // postamble. The code this generates is equivalent to the following
689 // JavaScript code:
690 // %__threwValue.val = __threwValue;
691 // if (%__THREW__.val != 0 & %__threwValue.val != 0) {
692 //   %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId);
693 //   if (%label == 0)
694 //     emscripten_longjmp(%__THREW__.val, %__threwValue.val);
695 //   setTempRet0(%__threwValue.val);
696 // } else {
697 //   %label = -1;
698 // }
699 // %longjmp_result = getTempRet0();
700 //
701 // As output parameters. returns %label, %longjmp_result, and the BB the last
702 // instruction (%longjmp_result = ...) is in.
703 void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
704     BasicBlock *BB, DebugLoc DL, Value *Threw, Value *FunctionInvocationId,
705     Value *&Label, Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB,
706     PHINode *&CallEmLongjmpBBThrewPHI, PHINode *&CallEmLongjmpBBThrewValuePHI,
707     BasicBlock *&EndBB) {
708   Function *F = BB->getParent();
709   Module *M = F->getParent();
710   LLVMContext &C = M->getContext();
711   IRBuilder<> IRB(C);
712   IRB.SetCurrentDebugLocation(DL);
713 
714   // if (%__THREW__.val != 0 & %__threwValue.val != 0)
715   IRB.SetInsertPoint(BB);
716   BasicBlock *ThenBB1 = BasicBlock::Create(C, "if.then1", F);
717   BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F);
718   BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F);
719   Value *ThrewCmp = IRB.CreateICmpNE(Threw, getAddrSizeInt(M, 0));
720   Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV,
721                                      ThrewValueGV->getName() + ".val");
722   Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0));
723   Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1");
724   IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1);
725 
726   // Generate call.em.longjmp BB once and share it within the function
727   if (!CallEmLongjmpBB) {
728     // emscripten_longjmp(%__THREW__.val, %__threwValue.val);
729     CallEmLongjmpBB = BasicBlock::Create(C, "call.em.longjmp", F);
730     IRB.SetInsertPoint(CallEmLongjmpBB);
731     CallEmLongjmpBBThrewPHI = IRB.CreatePHI(getAddrIntType(M), 4, "threw.phi");
732     CallEmLongjmpBBThrewValuePHI =
733         IRB.CreatePHI(IRB.getInt32Ty(), 4, "threwvalue.phi");
734     CallEmLongjmpBBThrewPHI->addIncoming(Threw, ThenBB1);
735     CallEmLongjmpBBThrewValuePHI->addIncoming(ThrewValue, ThenBB1);
736     IRB.CreateCall(EmLongjmpF,
737                    {CallEmLongjmpBBThrewPHI, CallEmLongjmpBBThrewValuePHI});
738     IRB.CreateUnreachable();
739   } else {
740     CallEmLongjmpBBThrewPHI->addIncoming(Threw, ThenBB1);
741     CallEmLongjmpBBThrewValuePHI->addIncoming(ThrewValue, ThenBB1);
742   }
743 
744   // %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId);
745   // if (%label == 0)
746   IRB.SetInsertPoint(ThenBB1);
747   BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F);
748   Value *ThrewPtr =
749       IRB.CreateIntToPtr(Threw, getAddrPtrType(M), Threw->getName() + ".p");
750   Value *ThenLabel = IRB.CreateCall(WasmSetjmpTestF,
751                                     {ThrewPtr, FunctionInvocationId}, "label");
752   Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0));
753   IRB.CreateCondBr(Cmp2, CallEmLongjmpBB, EndBB2);
754 
755   // setTempRet0(%__threwValue.val);
756   IRB.SetInsertPoint(EndBB2);
757   IRB.CreateCall(SetTempRet0F, ThrewValue);
758   IRB.CreateBr(EndBB1);
759 
760   IRB.SetInsertPoint(ElseBB1);
761   IRB.CreateBr(EndBB1);
762 
763   // longjmp_result = getTempRet0();
764   IRB.SetInsertPoint(EndBB1);
765   PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label");
766   LabelPHI->addIncoming(ThenLabel, EndBB2);
767 
768   LabelPHI->addIncoming(IRB.getInt32(-1), ElseBB1);
769 
770   // Output parameter assignment
771   Label = LabelPHI;
772   EndBB = EndBB1;
773   LongjmpResult = IRB.CreateCall(GetTempRet0F, {}, "longjmp_result");
774 }
775 
776 void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
777   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
778   DT.recalculate(F); // CFG has been changed
779 
780   SSAUpdaterBulk SSA;
781   for (BasicBlock &BB : F) {
782     for (Instruction &I : BB) {
783       if (I.getType()->isVoidTy())
784         continue;
785       unsigned VarID = SSA.AddVariable(I.getName(), I.getType());
786       // If a value is defined by an invoke instruction, it is only available in
787       // its normal destination and not in its unwind destination.
788       if (auto *II = dyn_cast<InvokeInst>(&I))
789         SSA.AddAvailableValue(VarID, II->getNormalDest(), II);
790       else
791         SSA.AddAvailableValue(VarID, &BB, &I);
792       for (auto &U : I.uses()) {
793         auto *User = cast<Instruction>(U.getUser());
794         if (auto *UserPN = dyn_cast<PHINode>(User))
795           if (UserPN->getIncomingBlock(U) == &BB)
796             continue;
797         if (DT.dominates(&I, User))
798           continue;
799         SSA.AddUse(VarID, &U);
800       }
801     }
802   }
803   SSA.RewriteAllUses(&DT);
804 }
805 
806 // Replace uses of longjmp with a new longjmp function in Emscripten library.
807 // In Emscripten SjLj, the new function is
808 //   void emscripten_longjmp(uintptr_t, i32)
809 // In Wasm SjLj, the new function is
810 //   void __wasm_longjmp(i8*, i32)
811 // Because the original libc longjmp function takes (jmp_buf*, i32), we need a
812 // ptrtoint/bitcast instruction here to make the type match. jmp_buf* will
813 // eventually be lowered to i32/i64 in the wasm backend.
814 void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF,
815                                                           Function *NewF) {
816   assert(NewF == EmLongjmpF || NewF == WasmLongjmpF);
817   Module *M = LongjmpF->getParent();
818   SmallVector<CallInst *, 8> ToErase;
819   LLVMContext &C = LongjmpF->getParent()->getContext();
820   IRBuilder<> IRB(C);
821 
822   // For calls to longjmp, replace it with emscripten_longjmp/__wasm_longjmp and
823   // cast its first argument (jmp_buf*) appropriately
824   for (User *U : LongjmpF->users()) {
825     auto *CI = dyn_cast<CallInst>(U);
826     if (CI && CI->getCalledFunction() == LongjmpF) {
827       IRB.SetInsertPoint(CI);
828       Value *Env = nullptr;
829       if (NewF == EmLongjmpF)
830         Env =
831             IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "env");
832       else // WasmLongjmpF
833         Env = IRB.CreateBitCast(CI->getArgOperand(0), IRB.getPtrTy(), "env");
834       IRB.CreateCall(NewF, {Env, CI->getArgOperand(1)});
835       ToErase.push_back(CI);
836     }
837   }
838   for (auto *I : ToErase)
839     I->eraseFromParent();
840 
841   // If we have any remaining uses of longjmp's function pointer, replace it
842   // with (void(*)(jmp_buf*, int))emscripten_longjmp / __wasm_longjmp.
843   if (!LongjmpF->uses().empty()) {
844     Value *NewLongjmp =
845         IRB.CreateBitCast(NewF, LongjmpF->getType(), "longjmp.cast");
846     LongjmpF->replaceAllUsesWith(NewLongjmp);
847   }
848 }
849 
850 static bool containsLongjmpableCalls(const Function *F) {
851   for (const auto &BB : *F)
852     for (const auto &I : BB)
853       if (const auto *CB = dyn_cast<CallBase>(&I))
854         if (canLongjmp(CB->getCalledOperand()))
855           return true;
856   return false;
857 }
858 
859 // When a function contains a setjmp call but not other calls that can longjmp,
860 // we don't do setjmp transformation for that setjmp. But we need to convert the
861 // setjmp calls into "i32 0" so they don't cause link time errors. setjmp always
862 // returns 0 when called directly.
863 static void nullifySetjmp(Function *F) {
864   Module &M = *F->getParent();
865   IRBuilder<> IRB(M.getContext());
866   Function *SetjmpF = M.getFunction("setjmp");
867   SmallVector<Instruction *, 1> ToErase;
868 
869   for (User *U : make_early_inc_range(SetjmpF->users())) {
870     auto *CB = cast<CallBase>(U);
871     BasicBlock *BB = CB->getParent();
872     if (BB->getParent() != F) // in other function
873       continue;
874     CallInst *CI = nullptr;
875     // setjmp cannot throw. So if it is an invoke, lower it to a call
876     if (auto *II = dyn_cast<InvokeInst>(CB))
877       CI = llvm::changeToCall(II);
878     else
879       CI = cast<CallInst>(CB);
880     ToErase.push_back(CI);
881     CI->replaceAllUsesWith(IRB.getInt32(0));
882   }
883   for (auto *I : ToErase)
884     I->eraseFromParent();
885 }
886 
887 bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
888   LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
889 
890   LLVMContext &C = M.getContext();
891   IRBuilder<> IRB(C);
892 
893   Function *SetjmpF = M.getFunction("setjmp");
894   Function *LongjmpF = M.getFunction("longjmp");
895 
896   // In some platforms _setjmp and _longjmp are used instead. Change these to
897   // use setjmp/longjmp instead, because we later detect these functions by
898   // their names.
899   Function *SetjmpF2 = M.getFunction("_setjmp");
900   Function *LongjmpF2 = M.getFunction("_longjmp");
901   if (SetjmpF2) {
902     if (SetjmpF) {
903       if (SetjmpF->getFunctionType() != SetjmpF2->getFunctionType())
904         report_fatal_error("setjmp and _setjmp have different function types");
905     } else {
906       SetjmpF = Function::Create(SetjmpF2->getFunctionType(),
907                                  GlobalValue::ExternalLinkage, "setjmp", M);
908     }
909     SetjmpF2->replaceAllUsesWith(SetjmpF);
910   }
911   if (LongjmpF2) {
912     if (LongjmpF) {
913       if (LongjmpF->getFunctionType() != LongjmpF2->getFunctionType())
914         report_fatal_error(
915             "longjmp and _longjmp have different function types");
916     } else {
917       LongjmpF = Function::Create(LongjmpF2->getFunctionType(),
918                                   GlobalValue::ExternalLinkage, "setjmp", M);
919     }
920     LongjmpF2->replaceAllUsesWith(LongjmpF);
921   }
922 
923   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
924   assert(TPC && "Expected a TargetPassConfig");
925   auto &TM = TPC->getTM<WebAssemblyTargetMachine>();
926 
927   // Declare (or get) global variables __THREW__, __threwValue, and
928   // getTempRet0/setTempRet0 function which are used in common for both
929   // exception handling and setjmp/longjmp handling
930   ThrewGV = getGlobalVariable(M, getAddrIntType(&M), TM, "__THREW__");
931   ThrewValueGV = getGlobalVariable(M, IRB.getInt32Ty(), TM, "__threwValue");
932   GetTempRet0F = getFunction(FunctionType::get(IRB.getInt32Ty(), false),
933                              "getTempRet0", &M);
934   SetTempRet0F =
935       getFunction(FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false),
936                   "setTempRet0", &M);
937   GetTempRet0F->setDoesNotThrow();
938   SetTempRet0F->setDoesNotThrow();
939 
940   bool Changed = false;
941 
942   // Function registration for exception handling
943   if (EnableEmEH) {
944     // Register __resumeException function
945     FunctionType *ResumeFTy =
946         FunctionType::get(IRB.getVoidTy(), IRB.getPtrTy(), false);
947     ResumeF = getFunction(ResumeFTy, "__resumeException", &M);
948     ResumeF->addFnAttr(Attribute::NoReturn);
949 
950     // Register llvm_eh_typeid_for function
951     FunctionType *EHTypeIDTy =
952         FunctionType::get(IRB.getInt32Ty(), IRB.getPtrTy(), false);
953     EHTypeIDF = getFunction(EHTypeIDTy, "llvm_eh_typeid_for", &M);
954   }
955 
956   // Functions that contains calls to setjmp but don't have other longjmpable
957   // calls within them.
958   SmallPtrSet<Function *, 4> SetjmpUsersToNullify;
959 
960   if ((EnableEmSjLj || EnableWasmSjLj) && SetjmpF) {
961     // Precompute setjmp users
962     for (User *U : SetjmpF->users()) {
963       if (auto *CB = dyn_cast<CallBase>(U)) {
964         auto *UserF = CB->getFunction();
965         // If a function that calls setjmp does not contain any other calls that
966         // can longjmp, we don't need to do any transformation on that function,
967         // so can ignore it
968         if (containsLongjmpableCalls(UserF))
969           SetjmpUsers.insert(UserF);
970         else
971           SetjmpUsersToNullify.insert(UserF);
972       } else {
973         std::string S;
974         raw_string_ostream SS(S);
975         SS << *U;
976         report_fatal_error(Twine("Indirect use of setjmp is not supported: ") +
977                            SS.str());
978       }
979     }
980   }
981 
982   bool SetjmpUsed = SetjmpF && !SetjmpUsers.empty();
983   bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
984   DoSjLj = (EnableEmSjLj | EnableWasmSjLj) && (SetjmpUsed || LongjmpUsed);
985 
986   // Function registration and data pre-gathering for setjmp/longjmp handling
987   if (DoSjLj) {
988     assert(EnableEmSjLj || EnableWasmSjLj);
989     if (EnableEmSjLj) {
990       // Register emscripten_longjmp function
991       FunctionType *FTy = FunctionType::get(
992           IRB.getVoidTy(), {getAddrIntType(&M), IRB.getInt32Ty()}, false);
993       EmLongjmpF = getFunction(FTy, "emscripten_longjmp", &M);
994       EmLongjmpF->addFnAttr(Attribute::NoReturn);
995     } else { // EnableWasmSjLj
996       Type *Int8PtrTy = IRB.getPtrTy();
997       // Register __wasm_longjmp function, which calls __builtin_wasm_longjmp.
998       FunctionType *FTy = FunctionType::get(
999           IRB.getVoidTy(), {Int8PtrTy, IRB.getInt32Ty()}, false);
1000       WasmLongjmpF = getFunction(FTy, "__wasm_longjmp", &M);
1001       WasmLongjmpF->addFnAttr(Attribute::NoReturn);
1002     }
1003 
1004     if (SetjmpF) {
1005       Type *Int8PtrTy = IRB.getPtrTy();
1006       Type *Int32PtrTy = IRB.getPtrTy();
1007       Type *Int32Ty = IRB.getInt32Ty();
1008 
1009       // Register __wasm_setjmp function
1010       FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
1011       FunctionType *FTy = FunctionType::get(
1012           IRB.getVoidTy(), {SetjmpFTy->getParamType(0), Int32Ty, Int32PtrTy},
1013           false);
1014       WasmSetjmpF = getFunction(FTy, "__wasm_setjmp", &M);
1015 
1016       // Register __wasm_setjmp_test function
1017       FTy = FunctionType::get(Int32Ty, {Int32PtrTy, Int32PtrTy}, false);
1018       WasmSetjmpTestF = getFunction(FTy, "__wasm_setjmp_test", &M);
1019 
1020       // wasm.catch() will be lowered down to wasm 'catch' instruction in
1021       // instruction selection.
1022       CatchF = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::wasm_catch);
1023       // Type for struct __WasmLongjmpArgs
1024       LongjmpArgsTy = StructType::get(Int8PtrTy, // env
1025                                       Int32Ty    // val
1026       );
1027     }
1028   }
1029 
1030   // Exception handling transformation
1031   if (EnableEmEH) {
1032     for (Function &F : M) {
1033       if (F.isDeclaration())
1034         continue;
1035       Changed |= runEHOnFunction(F);
1036     }
1037   }
1038 
1039   // Setjmp/longjmp handling transformation
1040   if (DoSjLj) {
1041     Changed = true; // We have setjmp or longjmp somewhere
1042     if (LongjmpF)
1043       replaceLongjmpWith(LongjmpF, EnableEmSjLj ? EmLongjmpF : WasmLongjmpF);
1044     // Only traverse functions that uses setjmp in order not to insert
1045     // unnecessary prep / cleanup code in every function
1046     if (SetjmpF)
1047       for (Function *F : SetjmpUsers)
1048         runSjLjOnFunction(*F);
1049   }
1050 
1051   // Replace unnecessary setjmp calls with 0
1052   if ((EnableEmSjLj || EnableWasmSjLj) && !SetjmpUsersToNullify.empty()) {
1053     Changed = true;
1054     assert(SetjmpF);
1055     for (Function *F : SetjmpUsersToNullify)
1056       nullifySetjmp(F);
1057   }
1058 
1059   // Delete unused global variables and functions
1060   for (auto *V : {ThrewGV, ThrewValueGV})
1061     if (V && V->use_empty())
1062       V->eraseFromParent();
1063   for (auto *V : {GetTempRet0F, SetTempRet0F, ResumeF, EHTypeIDF, EmLongjmpF,
1064                   WasmSetjmpF, WasmSetjmpTestF, WasmLongjmpF, CatchF})
1065     if (V && V->use_empty())
1066       V->eraseFromParent();
1067 
1068   return Changed;
1069 }
1070 
1071 bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
1072   Module &M = *F.getParent();
1073   LLVMContext &C = F.getContext();
1074   IRBuilder<> IRB(C);
1075   bool Changed = false;
1076   SmallVector<Instruction *, 64> ToErase;
1077   SmallPtrSet<LandingPadInst *, 32> LandingPads;
1078 
1079   // rethrow.longjmp BB that will be shared within the function.
1080   BasicBlock *RethrowLongjmpBB = nullptr;
1081   // PHI node for the loaded value of __THREW__ global variable in
1082   // rethrow.longjmp BB
1083   PHINode *RethrowLongjmpBBThrewPHI = nullptr;
1084 
1085   for (BasicBlock &BB : F) {
1086     auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
1087     if (!II)
1088       continue;
1089     Changed = true;
1090     LandingPads.insert(II->getLandingPadInst());
1091     IRB.SetInsertPoint(II);
1092 
1093     const Value *Callee = II->getCalledOperand();
1094     bool NeedInvoke = supportsException(&F) && canThrow(Callee);
1095     if (NeedInvoke) {
1096       // Wrap invoke with invoke wrapper and generate preamble/postamble
1097       Value *Threw = wrapInvoke(II);
1098       ToErase.push_back(II);
1099 
1100       // If setjmp/longjmp handling is enabled, the thrown value can be not an
1101       // exception but a longjmp. If the current function contains calls to
1102       // setjmp, it will be appropriately handled in runSjLjOnFunction. But even
1103       // if the function does not contain setjmp calls, we shouldn't silently
1104       // ignore longjmps; we should rethrow them so they can be correctly
1105       // handled in somewhere up the call chain where setjmp is. __THREW__'s
1106       // value is 0 when nothing happened, 1 when an exception is thrown, and
1107       // other values when longjmp is thrown.
1108       //
1109       // if (%__THREW__.val == 0 || %__THREW__.val == 1)
1110       //   goto %tail
1111       // else
1112       //   goto %longjmp.rethrow
1113       //
1114       // rethrow.longjmp: ;; This is longjmp. Rethrow it
1115       //   %__threwValue.val = __threwValue
1116       //   emscripten_longjmp(%__THREW__.val, %__threwValue.val);
1117       //
1118       // tail: ;; Nothing happened or an exception is thrown
1119       //   ... Continue exception handling ...
1120       if (DoSjLj && EnableEmSjLj && !SetjmpUsers.count(&F) &&
1121           canLongjmp(Callee)) {
1122         // Create longjmp.rethrow BB once and share it within the function
1123         if (!RethrowLongjmpBB) {
1124           RethrowLongjmpBB = BasicBlock::Create(C, "rethrow.longjmp", &F);
1125           IRB.SetInsertPoint(RethrowLongjmpBB);
1126           RethrowLongjmpBBThrewPHI =
1127               IRB.CreatePHI(getAddrIntType(&M), 4, "threw.phi");
1128           RethrowLongjmpBBThrewPHI->addIncoming(Threw, &BB);
1129           Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV,
1130                                              ThrewValueGV->getName() + ".val");
1131           IRB.CreateCall(EmLongjmpF, {RethrowLongjmpBBThrewPHI, ThrewValue});
1132           IRB.CreateUnreachable();
1133         } else {
1134           RethrowLongjmpBBThrewPHI->addIncoming(Threw, &BB);
1135         }
1136 
1137         IRB.SetInsertPoint(II); // Restore the insert point back
1138         BasicBlock *Tail = BasicBlock::Create(C, "tail", &F);
1139         Value *CmpEqOne =
1140             IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp.eq.one");
1141         Value *CmpEqZero =
1142             IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 0), "cmp.eq.zero");
1143         Value *Or = IRB.CreateOr(CmpEqZero, CmpEqOne, "or");
1144         IRB.CreateCondBr(Or, Tail, RethrowLongjmpBB);
1145         IRB.SetInsertPoint(Tail);
1146         BB.replaceSuccessorsPhiUsesWith(&BB, Tail);
1147       }
1148 
1149       // Insert a branch based on __THREW__ variable
1150       Value *Cmp = IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp");
1151       IRB.CreateCondBr(Cmp, II->getUnwindDest(), II->getNormalDest());
1152 
1153     } else {
1154       // This can't throw, and we don't need this invoke, just replace it with a
1155       // call+branch
1156       changeToCall(II);
1157     }
1158   }
1159 
1160   // Process resume instructions
1161   for (BasicBlock &BB : F) {
1162     // Scan the body of the basic block for resumes
1163     for (Instruction &I : BB) {
1164       auto *RI = dyn_cast<ResumeInst>(&I);
1165       if (!RI)
1166         continue;
1167       Changed = true;
1168 
1169       // Split the input into legal values
1170       Value *Input = RI->getValue();
1171       IRB.SetInsertPoint(RI);
1172       Value *Low = IRB.CreateExtractValue(Input, 0, "low");
1173       // Create a call to __resumeException function
1174       IRB.CreateCall(ResumeF, {Low});
1175       // Add a terminator to the block
1176       IRB.CreateUnreachable();
1177       ToErase.push_back(RI);
1178     }
1179   }
1180 
1181   // Process llvm.eh.typeid.for intrinsics
1182   for (BasicBlock &BB : F) {
1183     for (Instruction &I : BB) {
1184       auto *CI = dyn_cast<CallInst>(&I);
1185       if (!CI)
1186         continue;
1187       const Function *Callee = CI->getCalledFunction();
1188       if (!Callee)
1189         continue;
1190       if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for)
1191         continue;
1192       Changed = true;
1193 
1194       IRB.SetInsertPoint(CI);
1195       CallInst *NewCI =
1196           IRB.CreateCall(EHTypeIDF, CI->getArgOperand(0), "typeid");
1197       CI->replaceAllUsesWith(NewCI);
1198       ToErase.push_back(CI);
1199     }
1200   }
1201 
1202   // Look for orphan landingpads, can occur in blocks with no predecessors
1203   for (BasicBlock &BB : F) {
1204     BasicBlock::iterator I = BB.getFirstNonPHIIt();
1205     if (auto *LPI = dyn_cast<LandingPadInst>(I))
1206       LandingPads.insert(LPI);
1207   }
1208   Changed |= !LandingPads.empty();
1209 
1210   // Handle all the landingpad for this function together, as multiple invokes
1211   // may share a single lp
1212   for (LandingPadInst *LPI : LandingPads) {
1213     IRB.SetInsertPoint(LPI);
1214     SmallVector<Value *, 16> FMCArgs;
1215     for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) {
1216       Constant *Clause = LPI->getClause(I);
1217       // TODO Handle filters (= exception specifications).
1218       // https://github.com/llvm/llvm-project/issues/49740
1219       if (LPI->isCatch(I))
1220         FMCArgs.push_back(Clause);
1221     }
1222 
1223     // Create a call to __cxa_find_matching_catch_N function
1224     Function *FMCF = getFindMatchingCatch(M, FMCArgs.size());
1225     CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc");
1226     Value *Poison = PoisonValue::get(LPI->getType());
1227     Value *Pair0 = IRB.CreateInsertValue(Poison, FMCI, 0, "pair0");
1228     Value *TempRet0 = IRB.CreateCall(GetTempRet0F, {}, "tempret0");
1229     Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1");
1230 
1231     LPI->replaceAllUsesWith(Pair1);
1232     ToErase.push_back(LPI);
1233   }
1234 
1235   // Erase everything we no longer need in this function
1236   for (Instruction *I : ToErase)
1237     I->eraseFromParent();
1238 
1239   return Changed;
1240 }
1241 
1242 // This tries to get debug info from the instruction before which a new
1243 // instruction will be inserted, and if there's no debug info in that
1244 // instruction, tries to get the info instead from the previous instruction (if
1245 // any). If none of these has debug info and a DISubprogram is provided, it
1246 // creates a dummy debug info with the first line of the function, because IR
1247 // verifier requires all inlinable callsites should have debug info when both a
1248 // caller and callee have DISubprogram. If none of these conditions are met,
1249 // returns empty info.
1250 static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore,
1251                                     DISubprogram *SP) {
1252   assert(InsertBefore);
1253   if (InsertBefore->getDebugLoc())
1254     return InsertBefore->getDebugLoc();
1255   const Instruction *Prev = InsertBefore->getPrevNode();
1256   if (Prev && Prev->getDebugLoc())
1257     return Prev->getDebugLoc();
1258   if (SP)
1259     return DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
1260   return DebugLoc();
1261 }
1262 
1263 bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
1264   assert(EnableEmSjLj || EnableWasmSjLj);
1265   Module &M = *F.getParent();
1266   LLVMContext &C = F.getContext();
1267   IRBuilder<> IRB(C);
1268   SmallVector<Instruction *, 64> ToErase;
1269 
1270   // Setjmp preparation
1271 
1272   BasicBlock *Entry = &F.getEntryBlock();
1273   DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram());
1274   SplitBlock(Entry, &*Entry->getFirstInsertionPt());
1275 
1276   IRB.SetInsertPoint(Entry->getTerminator()->getIterator());
1277   // This alloca'ed pointer is used by the runtime to identify function
1278   // invocations. It's just for pointer comparisons. It will never be
1279   // dereferenced.
1280   Instruction *FunctionInvocationId =
1281       IRB.CreateAlloca(IRB.getInt32Ty(), nullptr, "functionInvocationId");
1282   FunctionInvocationId->setDebugLoc(FirstDL);
1283 
1284   // Setjmp transformation
1285   SmallVector<PHINode *, 4> SetjmpRetPHIs;
1286   Function *SetjmpF = M.getFunction("setjmp");
1287   for (auto *U : make_early_inc_range(SetjmpF->users())) {
1288     auto *CB = cast<CallBase>(U);
1289     BasicBlock *BB = CB->getParent();
1290     if (BB->getParent() != &F) // in other function
1291       continue;
1292     if (CB->getOperandBundle(LLVMContext::OB_funclet)) {
1293       std::string S;
1294       raw_string_ostream SS(S);
1295       SS << "In function " + F.getName() +
1296                 ": setjmp within a catch clause is not supported in Wasm EH:\n";
1297       SS << *CB;
1298       report_fatal_error(StringRef(SS.str()));
1299     }
1300 
1301     CallInst *CI = nullptr;
1302     // setjmp cannot throw. So if it is an invoke, lower it to a call
1303     if (auto *II = dyn_cast<InvokeInst>(CB))
1304       CI = llvm::changeToCall(II);
1305     else
1306       CI = cast<CallInst>(CB);
1307 
1308     // The tail is everything right after the call, and will be reached once
1309     // when setjmp is called, and later when longjmp returns to the setjmp
1310     BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
1311     // Add a phi to the tail, which will be the output of setjmp, which
1312     // indicates if this is the first call or a longjmp back. The phi directly
1313     // uses the right value based on where we arrive from
1314     IRB.SetInsertPoint(Tail, Tail->getFirstNonPHIIt());
1315     PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret");
1316 
1317     // setjmp initial call returns 0
1318     SetjmpRet->addIncoming(IRB.getInt32(0), BB);
1319     // The proper output is now this, not the setjmp call itself
1320     CI->replaceAllUsesWith(SetjmpRet);
1321     // longjmp returns to the setjmp will add themselves to this phi
1322     SetjmpRetPHIs.push_back(SetjmpRet);
1323 
1324     // Fix call target
1325     // Our index in the function is our place in the array + 1 to avoid index
1326     // 0, because index 0 means the longjmp is not ours to handle.
1327     IRB.SetInsertPoint(CI);
1328     Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()),
1329                      FunctionInvocationId};
1330     IRB.CreateCall(WasmSetjmpF, Args);
1331     ToErase.push_back(CI);
1332   }
1333 
1334   // Handle longjmpable calls.
1335   if (EnableEmSjLj)
1336     handleLongjmpableCallsForEmscriptenSjLj(F, FunctionInvocationId,
1337                                             SetjmpRetPHIs);
1338   else // EnableWasmSjLj
1339     handleLongjmpableCallsForWasmSjLj(F, FunctionInvocationId, SetjmpRetPHIs);
1340 
1341   // Erase everything we no longer need in this function
1342   for (Instruction *I : ToErase)
1343     I->eraseFromParent();
1344 
1345   // Finally, our modifications to the cfg can break dominance of SSA variables.
1346   // For example, in this code,
1347   // if (x()) { .. setjmp() .. }
1348   // if (y()) { .. longjmp() .. }
1349   // We must split the longjmp block, and it can jump into the block splitted
1350   // from setjmp one. But that means that when we split the setjmp block, it's
1351   // first part no longer dominates its second part - there is a theoretically
1352   // possible control flow path where x() is false, then y() is true and we
1353   // reach the second part of the setjmp block, without ever reaching the first
1354   // part. So, we rebuild SSA form here.
1355   rebuildSSA(F);
1356   return true;
1357 }
1358 
1359 // Update each call that can longjmp so it can return to the corresponding
1360 // setjmp. Refer to 4) of "Emscripten setjmp/longjmp handling" section in the
1361 // comments at top of the file for details.
1362 void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj(
1363     Function &F, Instruction *FunctionInvocationId,
1364     SmallVectorImpl<PHINode *> &SetjmpRetPHIs) {
1365   Module &M = *F.getParent();
1366   LLVMContext &C = F.getContext();
1367   IRBuilder<> IRB(C);
1368   SmallVector<Instruction *, 64> ToErase;
1369 
1370   // call.em.longjmp BB that will be shared within the function.
1371   BasicBlock *CallEmLongjmpBB = nullptr;
1372   // PHI node for the loaded value of __THREW__ global variable in
1373   // call.em.longjmp BB
1374   PHINode *CallEmLongjmpBBThrewPHI = nullptr;
1375   // PHI node for the loaded value of __threwValue global variable in
1376   // call.em.longjmp BB
1377   PHINode *CallEmLongjmpBBThrewValuePHI = nullptr;
1378   // rethrow.exn BB that will be shared within the function.
1379   BasicBlock *RethrowExnBB = nullptr;
1380 
1381   // Because we are creating new BBs while processing and don't want to make
1382   // all these newly created BBs candidates again for longjmp processing, we
1383   // first make the vector of candidate BBs.
1384   std::vector<BasicBlock *> BBs;
1385   for (BasicBlock &BB : F)
1386     BBs.push_back(&BB);
1387 
1388   // BBs.size() will change within the loop, so we query it every time
1389   for (unsigned I = 0; I < BBs.size(); I++) {
1390     BasicBlock *BB = BBs[I];
1391     for (Instruction &I : *BB) {
1392       if (isa<InvokeInst>(&I)) {
1393         std::string S;
1394         raw_string_ostream SS(S);
1395         SS << "In function " << F.getName()
1396            << ": When using Wasm EH with Emscripten SjLj, there is a "
1397               "restriction that `setjmp` function call and exception cannot be "
1398               "used within the same function:\n";
1399         SS << I;
1400         report_fatal_error(StringRef(SS.str()));
1401       }
1402       auto *CI = dyn_cast<CallInst>(&I);
1403       if (!CI)
1404         continue;
1405 
1406       const Value *Callee = CI->getCalledOperand();
1407       if (!canLongjmp(Callee))
1408         continue;
1409       if (isEmAsmCall(Callee))
1410         report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " +
1411                                F.getName() +
1412                                ". Please consider using EM_JS, or move the "
1413                                "EM_ASM into another function.",
1414                            false);
1415 
1416       Value *Threw = nullptr;
1417       BasicBlock *Tail;
1418       if (Callee->getName().starts_with("__invoke_")) {
1419         // If invoke wrapper has already been generated for this call in
1420         // previous EH phase, search for the load instruction
1421         // %__THREW__.val = __THREW__;
1422         // in postamble after the invoke wrapper call
1423         LoadInst *ThrewLI = nullptr;
1424         StoreInst *ThrewResetSI = nullptr;
1425         for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end();
1426              I != IE; ++I) {
1427           if (auto *LI = dyn_cast<LoadInst>(I))
1428             if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
1429               if (GV == ThrewGV) {
1430                 Threw = ThrewLI = LI;
1431                 break;
1432               }
1433         }
1434         // Search for the store instruction after the load above
1435         // __THREW__ = 0;
1436         for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end();
1437              I != IE; ++I) {
1438           if (auto *SI = dyn_cast<StoreInst>(I)) {
1439             if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand())) {
1440               if (GV == ThrewGV &&
1441                   SI->getValueOperand() == getAddrSizeInt(&M, 0)) {
1442                 ThrewResetSI = SI;
1443                 break;
1444               }
1445             }
1446           }
1447         }
1448         assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke");
1449         assert(ThrewResetSI && "Cannot find __THREW__ store after invoke");
1450         Tail = SplitBlock(BB, ThrewResetSI->getNextNode());
1451 
1452       } else {
1453         // Wrap call with invoke wrapper and generate preamble/postamble
1454         Threw = wrapInvoke(CI);
1455         ToErase.push_back(CI);
1456         Tail = SplitBlock(BB, CI->getNextNode());
1457 
1458         // If exception handling is enabled, the thrown value can be not a
1459         // longjmp but an exception, in which case we shouldn't silently ignore
1460         // exceptions; we should rethrow them.
1461         // __THREW__'s value is 0 when nothing happened, 1 when an exception is
1462         // thrown, other values when longjmp is thrown.
1463         //
1464         // if (%__THREW__.val == 1)
1465         //   goto %eh.rethrow
1466         // else
1467         //   goto %normal
1468         //
1469         // eh.rethrow: ;; Rethrow exception
1470         //   %exn = call @__cxa_find_matching_catch_2() ;; Retrieve thrown ptr
1471         //   __resumeException(%exn)
1472         //
1473         // normal:
1474         //   <-- Insertion point. Will insert sjlj handling code from here
1475         //   goto %tail
1476         //
1477         // tail:
1478         //   ...
1479         if (supportsException(&F) && canThrow(Callee)) {
1480           // We will add a new conditional branch. So remove the branch created
1481           // when we split the BB
1482           ToErase.push_back(BB->getTerminator());
1483 
1484           // Generate rethrow.exn BB once and share it within the function
1485           if (!RethrowExnBB) {
1486             RethrowExnBB = BasicBlock::Create(C, "rethrow.exn", &F);
1487             IRB.SetInsertPoint(RethrowExnBB);
1488             CallInst *Exn =
1489                 IRB.CreateCall(getFindMatchingCatch(M, 0), {}, "exn");
1490             IRB.CreateCall(ResumeF, {Exn});
1491             IRB.CreateUnreachable();
1492           }
1493 
1494           IRB.SetInsertPoint(CI);
1495           BasicBlock *NormalBB = BasicBlock::Create(C, "normal", &F);
1496           Value *CmpEqOne =
1497               IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp.eq.one");
1498           IRB.CreateCondBr(CmpEqOne, RethrowExnBB, NormalBB);
1499 
1500           IRB.SetInsertPoint(NormalBB);
1501           IRB.CreateBr(Tail);
1502           BB = NormalBB; // New insertion point to insert __wasm_setjmp_test()
1503         }
1504       }
1505 
1506       // We need to replace the terminator in Tail - SplitBlock makes BB go
1507       // straight to Tail, we need to check if a longjmp occurred, and go to the
1508       // right setjmp-tail if so
1509       ToErase.push_back(BB->getTerminator());
1510 
1511       // Generate a function call to __wasm_setjmp_test function and
1512       // preamble/postamble code to figure out (1) whether longjmp
1513       // occurred (2) if longjmp occurred, which setjmp it corresponds to
1514       Value *Label = nullptr;
1515       Value *LongjmpResult = nullptr;
1516       BasicBlock *EndBB = nullptr;
1517       wrapTestSetjmp(BB, CI->getDebugLoc(), Threw, FunctionInvocationId, Label,
1518                      LongjmpResult, CallEmLongjmpBB, CallEmLongjmpBBThrewPHI,
1519                      CallEmLongjmpBBThrewValuePHI, EndBB);
1520       assert(Label && LongjmpResult && EndBB);
1521 
1522       // Create switch instruction
1523       IRB.SetInsertPoint(EndBB);
1524       IRB.SetCurrentDebugLocation(EndBB->back().getDebugLoc());
1525       SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size());
1526       // -1 means no longjmp happened, continue normally (will hit the default
1527       // switch case). 0 means a longjmp that is not ours to handle, needs a
1528       // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
1529       // 0).
1530       for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) {
1531         SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent());
1532         SetjmpRetPHIs[I]->addIncoming(LongjmpResult, EndBB);
1533       }
1534 
1535       // We are splitting the block here, and must continue to find other calls
1536       // in the block - which is now split. so continue to traverse in the Tail
1537       BBs.push_back(Tail);
1538     }
1539   }
1540 
1541   for (Instruction *I : ToErase)
1542     I->eraseFromParent();
1543 }
1544 
1545 static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CPI) {
1546   for (const User *U : CPI->users())
1547     if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
1548       return CRI->getUnwindDest();
1549   return nullptr;
1550 }
1551 
1552 // Create a catchpad in which we catch a longjmp's env and val arguments, test
1553 // if the longjmp corresponds to one of setjmps in the current function, and if
1554 // so, jump to the setjmp dispatch BB from which we go to one of post-setjmp
1555 // BBs. Refer to 4) of "Wasm setjmp/longjmp handling" section in the comments at
1556 // top of the file for details.
1557 void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
1558     Function &F, Instruction *FunctionInvocationId,
1559     SmallVectorImpl<PHINode *> &SetjmpRetPHIs) {
1560   Module &M = *F.getParent();
1561   LLVMContext &C = F.getContext();
1562   IRBuilder<> IRB(C);
1563 
1564   // A function with catchswitch/catchpad instruction should have a personality
1565   // function attached to it. Search for the wasm personality function, and if
1566   // it exists, use it, and if it doesn't, create a dummy personality function.
1567   // (SjLj is not going to call it anyway.)
1568   if (!F.hasPersonalityFn()) {
1569     StringRef PersName = getEHPersonalityName(EHPersonality::Wasm_CXX);
1570     FunctionType *PersType =
1571         FunctionType::get(IRB.getInt32Ty(), /* isVarArg */ true);
1572     Value *PersF = M.getOrInsertFunction(PersName, PersType).getCallee();
1573     F.setPersonalityFn(
1574         cast<Constant>(IRB.CreateBitCast(PersF, IRB.getPtrTy())));
1575   }
1576 
1577   // Use the entry BB's debugloc as a fallback
1578   BasicBlock *Entry = &F.getEntryBlock();
1579   DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram());
1580   IRB.SetCurrentDebugLocation(FirstDL);
1581 
1582   // Add setjmp.dispatch BB right after the entry block. Because we have
1583   // initialized functionInvocationId in the entry block and split the
1584   // rest into another BB, here 'OrigEntry' is the function's original entry
1585   // block before the transformation.
1586   //
1587   // entry:
1588   //   functionInvocationId initialization
1589   // setjmp.dispatch:
1590   //   switch will be inserted here later
1591   // entry.split: (OrigEntry)
1592   //   the original function starts here
1593   BasicBlock *OrigEntry = Entry->getNextNode();
1594   BasicBlock *SetjmpDispatchBB =
1595       BasicBlock::Create(C, "setjmp.dispatch", &F, OrigEntry);
1596   cast<BranchInst>(Entry->getTerminator())->setSuccessor(0, SetjmpDispatchBB);
1597 
1598   // Create catch.dispatch.longjmp BB and a catchswitch instruction
1599   BasicBlock *CatchDispatchLongjmpBB =
1600       BasicBlock::Create(C, "catch.dispatch.longjmp", &F);
1601   IRB.SetInsertPoint(CatchDispatchLongjmpBB);
1602   CatchSwitchInst *CatchSwitchLongjmp =
1603       IRB.CreateCatchSwitch(ConstantTokenNone::get(C), nullptr, 1);
1604 
1605   // Create catch.longjmp BB and a catchpad instruction
1606   BasicBlock *CatchLongjmpBB = BasicBlock::Create(C, "catch.longjmp", &F);
1607   CatchSwitchLongjmp->addHandler(CatchLongjmpBB);
1608   IRB.SetInsertPoint(CatchLongjmpBB);
1609   CatchPadInst *CatchPad = IRB.CreateCatchPad(CatchSwitchLongjmp, {});
1610 
1611   // Wasm throw and catch instructions can throw and catch multiple values, but
1612   // that requires multivalue support in the toolchain, which is currently not
1613   // very reliable. We instead throw and catch a pointer to a struct value of
1614   // type 'struct __WasmLongjmpArgs', which is defined in Emscripten.
1615   Instruction *LongjmpArgs =
1616       IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::C_LONGJMP)}, "thrown");
1617   Value *EnvField =
1618       IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 0, "env_gep");
1619   Value *ValField =
1620       IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 1, "val_gep");
1621   // void *env = __wasm_longjmp_args.env;
1622   Instruction *Env = IRB.CreateLoad(IRB.getPtrTy(), EnvField, "env");
1623   // int val = __wasm_longjmp_args.val;
1624   Instruction *Val = IRB.CreateLoad(IRB.getInt32Ty(), ValField, "val");
1625 
1626   // %label = __wasm_setjmp_test(%env, functionInvocatinoId);
1627   // if (%label == 0)
1628   //   __wasm_longjmp(%env, %val)
1629   // catchret to %setjmp.dispatch
1630   BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", &F);
1631   BasicBlock *EndBB = BasicBlock::Create(C, "if.end", &F);
1632   Value *EnvP = IRB.CreateBitCast(Env, getAddrPtrType(&M), "env.p");
1633   Value *Label = IRB.CreateCall(WasmSetjmpTestF, {EnvP, FunctionInvocationId},
1634                                 OperandBundleDef("funclet", CatchPad), "label");
1635   Value *Cmp = IRB.CreateICmpEQ(Label, IRB.getInt32(0));
1636   IRB.CreateCondBr(Cmp, ThenBB, EndBB);
1637 
1638   IRB.SetInsertPoint(ThenBB);
1639   CallInst *WasmLongjmpCI = IRB.CreateCall(
1640       WasmLongjmpF, {Env, Val}, OperandBundleDef("funclet", CatchPad));
1641   IRB.CreateUnreachable();
1642 
1643   IRB.SetInsertPoint(EndBB);
1644   // Jump to setjmp.dispatch block
1645   IRB.CreateCatchRet(CatchPad, SetjmpDispatchBB);
1646 
1647   // Go back to setjmp.dispatch BB
1648   // setjmp.dispatch:
1649   //   switch %label {
1650   //     label 1: goto post-setjmp BB 1
1651   //     label 2: goto post-setjmp BB 2
1652   //     ...
1653   //     default: goto splitted next BB
1654   //   }
1655   IRB.SetInsertPoint(SetjmpDispatchBB);
1656   PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label.phi");
1657   LabelPHI->addIncoming(Label, EndBB);
1658   LabelPHI->addIncoming(IRB.getInt32(-1), Entry);
1659   SwitchInst *SI = IRB.CreateSwitch(LabelPHI, OrigEntry, SetjmpRetPHIs.size());
1660   // -1 means no longjmp happened, continue normally (will hit the default
1661   // switch case). 0 means a longjmp that is not ours to handle, needs a
1662   // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
1663   // 0).
1664   for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) {
1665     SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent());
1666     SetjmpRetPHIs[I]->addIncoming(Val, SetjmpDispatchBB);
1667   }
1668 
1669   // Convert all longjmpable call instructions to invokes that unwind to the
1670   // newly created catch.dispatch.longjmp BB.
1671   SmallVector<CallInst *, 64> LongjmpableCalls;
1672   for (auto *BB = &*F.begin(); BB; BB = BB->getNextNode()) {
1673     for (auto &I : *BB) {
1674       auto *CI = dyn_cast<CallInst>(&I);
1675       if (!CI)
1676         continue;
1677       const Value *Callee = CI->getCalledOperand();
1678       if (!canLongjmp(Callee))
1679         continue;
1680       if (isEmAsmCall(Callee))
1681         report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " +
1682                                F.getName() +
1683                                ". Please consider using EM_JS, or move the "
1684                                "EM_ASM into another function.",
1685                            false);
1686       // This is __wasm_longjmp() call we inserted in this function, which
1687       // rethrows the longjmp when the longjmp does not correspond to one of
1688       // setjmps in this function. We should not convert this call to an invoke.
1689       if (CI == WasmLongjmpCI)
1690         continue;
1691       LongjmpableCalls.push_back(CI);
1692     }
1693   }
1694 
1695   SmallDenseMap<BasicBlock *, SmallSetVector<BasicBlock *, 4>, 4>
1696       UnwindDestToNewPreds;
1697   for (auto *CI : LongjmpableCalls) {
1698     // Even if the callee function has attribute 'nounwind', which is true for
1699     // all C functions, it can longjmp, which means it can throw a Wasm
1700     // exception now.
1701     CI->removeFnAttr(Attribute::NoUnwind);
1702     if (Function *CalleeF = CI->getCalledFunction())
1703       CalleeF->removeFnAttr(Attribute::NoUnwind);
1704 
1705     // Change it to an invoke and make it unwind to the catch.dispatch.longjmp
1706     // BB. If the call is enclosed in another catchpad/cleanuppad scope, unwind
1707     // to its parent pad's unwind destination instead to preserve the scope
1708     // structure. It will eventually unwind to the catch.dispatch.longjmp.
1709     BasicBlock *UnwindDest = nullptr;
1710     if (auto Bundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
1711       Instruction *FromPad = cast<Instruction>(Bundle->Inputs[0]);
1712       while (!UnwindDest) {
1713         if (auto *CPI = dyn_cast<CatchPadInst>(FromPad)) {
1714           UnwindDest = CPI->getCatchSwitch()->getUnwindDest();
1715           break;
1716         }
1717         if (auto *CPI = dyn_cast<CleanupPadInst>(FromPad)) {
1718           // getCleanupRetUnwindDest() can return nullptr when
1719           // 1. This cleanuppad's matching cleanupret uwninds to caller
1720           // 2. There is no matching cleanupret because it ends with
1721           //    unreachable.
1722           // In case of 2, we need to traverse the parent pad chain.
1723           UnwindDest = getCleanupRetUnwindDest(CPI);
1724           Value *ParentPad = CPI->getParentPad();
1725           if (isa<ConstantTokenNone>(ParentPad))
1726             break;
1727           FromPad = cast<Instruction>(ParentPad);
1728         }
1729       }
1730     }
1731     if (!UnwindDest)
1732       UnwindDest = CatchDispatchLongjmpBB;
1733     // Because we are changing a longjmpable call to an invoke, its unwind
1734     // destination can be an existing EH pad that already have phis, and the BB
1735     // with the newly created invoke will become a new predecessor of that EH
1736     // pad. In this case we need to add the new predecessor to those phis.
1737     UnwindDestToNewPreds[UnwindDest].insert(CI->getParent());
1738     changeToInvokeAndSplitBasicBlock(CI, UnwindDest);
1739   }
1740 
1741   SmallVector<Instruction *, 16> ToErase;
1742   for (auto &BB : F) {
1743     if (auto *CSI = dyn_cast<CatchSwitchInst>(BB.getFirstNonPHIIt())) {
1744       if (CSI != CatchSwitchLongjmp && CSI->unwindsToCaller()) {
1745         IRB.SetInsertPoint(CSI);
1746         ToErase.push_back(CSI);
1747         auto *NewCSI = IRB.CreateCatchSwitch(CSI->getParentPad(),
1748                                              CatchDispatchLongjmpBB, 1);
1749         NewCSI->addHandler(*CSI->handler_begin());
1750         NewCSI->takeName(CSI);
1751         CSI->replaceAllUsesWith(NewCSI);
1752       }
1753     }
1754 
1755     if (auto *CRI = dyn_cast<CleanupReturnInst>(BB.getTerminator())) {
1756       if (CRI->unwindsToCaller()) {
1757         IRB.SetInsertPoint(CRI);
1758         ToErase.push_back(CRI);
1759         IRB.CreateCleanupRet(CRI->getCleanupPad(), CatchDispatchLongjmpBB);
1760       }
1761     }
1762   }
1763 
1764   for (Instruction *I : ToErase)
1765     I->eraseFromParent();
1766 
1767   // Add entries for new predecessors to phis in unwind destinations. We use
1768   // 'poison' as a placeholder value. We should make sure the phis have a valid
1769   // set of predecessors before running SSAUpdater, because SSAUpdater
1770   // internally can use existing phis to gather predecessor info rather than
1771   // scanning the actual CFG (See FindPredecessorBlocks in SSAUpdater.cpp for
1772   // details).
1773   for (auto &[UnwindDest, NewPreds] : UnwindDestToNewPreds) {
1774     for (PHINode &PN : UnwindDest->phis()) {
1775       for (auto *NewPred : NewPreds) {
1776         assert(PN.getBasicBlockIndex(NewPred) == -1);
1777         PN.addIncoming(PoisonValue::get(PN.getType()), NewPred);
1778       }
1779     }
1780   }
1781 
1782   // For unwind destinations for newly added invokes to longjmpable functions,
1783   // calculate incoming values for the newly added predecessors using
1784   // SSAUpdater. We add existing values in the phis to SSAUpdater as available
1785   // values and let it calculate what the value should be at the end of new
1786   // incoming blocks.
1787   for (auto &[UnwindDest, NewPreds] : UnwindDestToNewPreds) {
1788     for (PHINode &PN : UnwindDest->phis()) {
1789       SSAUpdater SSA;
1790       SSA.Initialize(PN.getType(), PN.getName());
1791       for (unsigned Idx = 0, E = PN.getNumIncomingValues(); Idx != E; ++Idx) {
1792         if (NewPreds.contains(PN.getIncomingBlock(Idx)))
1793           continue;
1794         Value *V = PN.getIncomingValue(Idx);
1795         if (auto *II = dyn_cast<InvokeInst>(V))
1796           SSA.AddAvailableValue(II->getNormalDest(), II);
1797         else if (auto *I = dyn_cast<Instruction>(V))
1798           SSA.AddAvailableValue(I->getParent(), I);
1799         else
1800           SSA.AddAvailableValue(PN.getIncomingBlock(Idx), V);
1801       }
1802       for (auto *NewPred : NewPreds)
1803         PN.setIncomingValueForBlock(NewPred, SSA.GetValueAtEndOfBlock(NewPred));
1804       assert(PN.isComplete());
1805     }
1806   }
1807 }
1808