xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (revision 19261079b74319502c6ffa1249920079f0f69a72)
1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssemblyMachineFunctionInfo.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyTargetMachine.h"
19 #include "WebAssemblyUtilities.h"
20 #include "llvm/CodeGen/Analysis.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineJumpTableInfo.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/WasmEHFuncInfo.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsWebAssembly.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Target/TargetOptions.h"
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "wasm-lower"
41 
42 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
43     const TargetMachine &TM, const WebAssemblySubtarget &STI)
44     : TargetLowering(TM), Subtarget(&STI) {
45   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
46 
47   // Booleans always contain 0 or 1.
48   setBooleanContents(ZeroOrOneBooleanContent);
49   // Except in SIMD vectors
50   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
51   // We don't know the microarchitecture here, so just reduce register pressure.
52   setSchedulingPreference(Sched::RegPressure);
53   // Tell ISel that we have a stack pointer.
54   setStackPointerRegisterToSaveRestore(
55       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
56   // Set up the register classes.
57   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
58   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
59   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
60   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
61   if (Subtarget->hasSIMD128()) {
62     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
63     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
64     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
65     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
67     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
68   }
69   // Compute derived properties from the register classes.
70   computeRegisterProperties(Subtarget->getRegisterInfo());
71 
72   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
73   setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);
74   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
75   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
76   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
77   setOperationAction(ISD::BRIND, MVT::Other, Custom);
78 
79   // Take the default expansion for va_arg, va_copy, and va_end. There is no
80   // default action for va_start, so we do that custom.
81   setOperationAction(ISD::VASTART, MVT::Other, Custom);
82   setOperationAction(ISD::VAARG, MVT::Other, Expand);
83   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
84   setOperationAction(ISD::VAEND, MVT::Other, Expand);
85 
86   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
87     // Don't expand the floating-point types to constant pools.
88     setOperationAction(ISD::ConstantFP, T, Legal);
89     // Expand floating-point comparisons.
90     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
91                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
92       setCondCodeAction(CC, T, Expand);
93     // Expand floating-point library function operators.
94     for (auto Op :
95          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
96       setOperationAction(Op, T, Expand);
97     // Note supported floating-point library function operators that otherwise
98     // default to expand.
99     for (auto Op :
100          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
101       setOperationAction(Op, T, Legal);
102     // Support minimum and maximum, which otherwise default to expand.
103     setOperationAction(ISD::FMINIMUM, T, Legal);
104     setOperationAction(ISD::FMAXIMUM, T, Legal);
105     // WebAssembly currently has no builtin f16 support.
106     setOperationAction(ISD::FP16_TO_FP, T, Expand);
107     setOperationAction(ISD::FP_TO_FP16, T, Expand);
108     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
109     setTruncStoreAction(T, MVT::f16, Expand);
110   }
111 
112   // Expand unavailable integer operations.
113   for (auto Op :
114        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
115         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
116         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
117     for (auto T : {MVT::i32, MVT::i64})
118       setOperationAction(Op, T, Expand);
119     if (Subtarget->hasSIMD128())
120       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
121         setOperationAction(Op, T, Expand);
122   }
123 
124   // SIMD-specific configuration
125   if (Subtarget->hasSIMD128()) {
126     // Hoist bitcasts out of shuffles
127     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
128 
129     // Combine extends of extract_subvectors into widening ops
130     setTargetDAGCombine(ISD::SIGN_EXTEND);
131     setTargetDAGCombine(ISD::ZERO_EXTEND);
132 
133     // Support saturating add for i8x16 and i16x8
134     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
135       for (auto T : {MVT::v16i8, MVT::v8i16})
136         setOperationAction(Op, T, Legal);
137 
138     // Support integer abs
139     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
140       setOperationAction(ISD::ABS, T, Legal);
141 
142     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
143     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
144                    MVT::v2f64})
145       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
146 
147     // We have custom shuffle lowering to expose the shuffle mask
148     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
149                    MVT::v2f64})
150       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
151 
152     // Custom lowering since wasm shifts must have a scalar shift amount
153     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
154       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
155         setOperationAction(Op, T, Custom);
156 
157     // Custom lower lane accesses to expand out variable indices
158     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
159       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
160                      MVT::v2f64})
161         setOperationAction(Op, T, Custom);
162 
163     // There is no i8x16.mul instruction
164     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
165 
166     // There is no vector conditional select instruction
167     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
168                    MVT::v2f64})
169       setOperationAction(ISD::SELECT_CC, T, Expand);
170 
171     // Expand integer operations supported for scalars but not SIMD
172     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
173                     ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
174       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175         setOperationAction(Op, T, Expand);
176 
177     // But we do have integer min and max operations
178     for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
179       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
180         setOperationAction(Op, T, Legal);
181 
182     // Expand float operations supported for scalars but not SIMD
183     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
184                     ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
185                     ISD::FEXP, ISD::FEXP2, ISD::FRINT})
186       for (auto T : {MVT::v4f32, MVT::v2f64})
187         setOperationAction(Op, T, Expand);
188 
189     // Expand operations not supported for i64x2 vectors
190     for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
191       setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
192 
193     // 64x2 conversions are not in the spec
194     for (auto Op :
195          {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
196       for (auto T : {MVT::v2i64, MVT::v2f64})
197         setOperationAction(Op, T, Expand);
198   }
199 
200   // As a special case, these operators use the type to mean the type to
201   // sign-extend from.
202   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
203   if (!Subtarget->hasSignExt()) {
204     // Sign extends are legal only when extending a vector extract
205     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
206     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
207       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
208   }
209   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
210     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
211 
212   // Dynamic stack allocation: use the default expansion.
213   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
214   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
215   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
216 
217   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
218   setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
219   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
220 
221   // Expand these forms; we pattern-match the forms that we can handle in isel.
222   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
223     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
224       setOperationAction(Op, T, Expand);
225 
226   // We have custom switch handling.
227   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
228 
229   // WebAssembly doesn't have:
230   //  - Floating-point extending loads.
231   //  - Floating-point truncating stores.
232   //  - i1 extending loads.
233   //  - truncating SIMD stores and most extending loads
234   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
235   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
236   for (auto T : MVT::integer_valuetypes())
237     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
238       setLoadExtAction(Ext, T, MVT::i1, Promote);
239   if (Subtarget->hasSIMD128()) {
240     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
241                    MVT::v2f64}) {
242       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
243         if (MVT(T) != MemT) {
244           setTruncStoreAction(T, MemT, Expand);
245           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
246             setLoadExtAction(Ext, T, MemT, Expand);
247         }
248       }
249     }
250     // But some vector extending loads are legal
251     for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
252       setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
253       setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
254       setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
255     }
256     // And some truncating stores are legal as well
257     setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
258     setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
259   }
260 
261   // Don't do anything clever with build_pairs
262   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
263 
264   // Trap lowers to wasm unreachable
265   setOperationAction(ISD::TRAP, MVT::Other, Legal);
266   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
267 
268   // Exception handling intrinsics
269   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
270   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
271   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
272 
273   setMaxAtomicSizeInBitsSupported(64);
274 
275   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
276   // consistent with the f64 and f128 names.
277   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
278   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
279 
280   // Define the emscripten name for return address helper.
281   // TODO: when implementing other Wasm backends, make this generic or only do
282   // this on emscripten depending on what they end up doing.
283   setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
284 
285   // Always convert switches to br_tables unless there is only one case, which
286   // is equivalent to a simple branch. This reduces code size for wasm, and we
287   // defer possible jump table optimizations to the VM.
288   setMinimumJumpTableEntries(2);
289 }
290 
291 TargetLowering::AtomicExpansionKind
292 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
293   // We have wasm instructions for these
294   switch (AI->getOperation()) {
295   case AtomicRMWInst::Add:
296   case AtomicRMWInst::Sub:
297   case AtomicRMWInst::And:
298   case AtomicRMWInst::Or:
299   case AtomicRMWInst::Xor:
300   case AtomicRMWInst::Xchg:
301     return AtomicExpansionKind::None;
302   default:
303     break;
304   }
305   return AtomicExpansionKind::CmpXChg;
306 }
307 
308 FastISel *WebAssemblyTargetLowering::createFastISel(
309     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
310   return WebAssembly::createFastISel(FuncInfo, LibInfo);
311 }
312 
313 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
314                                                       EVT VT) const {
315   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
316   if (BitWidth > 1 && BitWidth < 8)
317     BitWidth = 8;
318 
319   if (BitWidth > 64) {
320     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
321     // the count to be an i32.
322     BitWidth = 32;
323     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
324            "32-bit shift counts ought to be enough for anyone");
325   }
326 
327   MVT Result = MVT::getIntegerVT(BitWidth);
328   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
329          "Unable to represent scalar shift amount type");
330   return Result;
331 }
332 
333 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
334 // undefined result on invalid/overflow, to the WebAssembly opcode, which
335 // traps on invalid/overflow.
336 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
337                                        MachineBasicBlock *BB,
338                                        const TargetInstrInfo &TII,
339                                        bool IsUnsigned, bool Int64,
340                                        bool Float64, unsigned LoweredOpcode) {
341   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
342 
343   Register OutReg = MI.getOperand(0).getReg();
344   Register InReg = MI.getOperand(1).getReg();
345 
346   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
347   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
348   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
349   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
350   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
351   unsigned Eqz = WebAssembly::EQZ_I32;
352   unsigned And = WebAssembly::AND_I32;
353   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
354   int64_t Substitute = IsUnsigned ? 0 : Limit;
355   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
356   auto &Context = BB->getParent()->getFunction().getContext();
357   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
358 
359   const BasicBlock *LLVMBB = BB->getBasicBlock();
360   MachineFunction *F = BB->getParent();
361   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
362   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
363   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
364 
365   MachineFunction::iterator It = ++BB->getIterator();
366   F->insert(It, FalseMBB);
367   F->insert(It, TrueMBB);
368   F->insert(It, DoneMBB);
369 
370   // Transfer the remainder of BB and its successor edges to DoneMBB.
371   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
372   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
373 
374   BB->addSuccessor(TrueMBB);
375   BB->addSuccessor(FalseMBB);
376   TrueMBB->addSuccessor(DoneMBB);
377   FalseMBB->addSuccessor(DoneMBB);
378 
379   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
380   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
381   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
382   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
383   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
384   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
385   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
386 
387   MI.eraseFromParent();
388   // For signed numbers, we can do a single comparison to determine whether
389   // fabs(x) is within range.
390   if (IsUnsigned) {
391     Tmp0 = InReg;
392   } else {
393     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
394   }
395   BuildMI(BB, DL, TII.get(FConst), Tmp1)
396       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
397   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
398 
399   // For unsigned numbers, we have to do a separate comparison with zero.
400   if (IsUnsigned) {
401     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
402     Register SecondCmpReg =
403         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
404     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
405     BuildMI(BB, DL, TII.get(FConst), Tmp1)
406         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
407     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
408     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
409     CmpReg = AndReg;
410   }
411 
412   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
413 
414   // Create the CFG diamond to select between doing the conversion or using
415   // the substitute value.
416   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
417   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
418   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
419   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
420   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
421       .addReg(FalseReg)
422       .addMBB(FalseMBB)
423       .addReg(TrueReg)
424       .addMBB(TrueMBB);
425 
426   return DoneMBB;
427 }
428 
429 static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
430                                            DebugLoc DL, MachineBasicBlock *BB,
431                                            const TargetInstrInfo &TII) {
432   MachineInstr &CallParams = *CallResults.getPrevNode();
433   assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
434   assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
435          CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
436 
437   bool IsIndirect = CallParams.getOperand(0).isReg();
438   bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
439 
440   unsigned CallOp;
441   if (IsIndirect && IsRetCall) {
442     CallOp = WebAssembly::RET_CALL_INDIRECT;
443   } else if (IsIndirect) {
444     CallOp = WebAssembly::CALL_INDIRECT;
445   } else if (IsRetCall) {
446     CallOp = WebAssembly::RET_CALL;
447   } else {
448     CallOp = WebAssembly::CALL;
449   }
450 
451   MachineFunction &MF = *BB->getParent();
452   const MCInstrDesc &MCID = TII.get(CallOp);
453   MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
454 
455   // See if we must truncate the function pointer.
456   // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
457   // as 64-bit for uniformity with other pointer types.
458   if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
459     Register Reg32 =
460         MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
461     auto &FnPtr = CallParams.getOperand(0);
462     BuildMI(*BB, CallResults.getIterator(), DL,
463             TII.get(WebAssembly::I32_WRAP_I64), Reg32)
464         .addReg(FnPtr.getReg());
465     FnPtr.setReg(Reg32);
466   }
467 
468   // Move the function pointer to the end of the arguments for indirect calls
469   if (IsIndirect) {
470     auto FnPtr = CallParams.getOperand(0);
471     CallParams.RemoveOperand(0);
472     CallParams.addOperand(FnPtr);
473   }
474 
475   for (auto Def : CallResults.defs())
476     MIB.add(Def);
477 
478   // Add placeholders for the type index and immediate flags
479   if (IsIndirect) {
480     MIB.addImm(0);
481     MIB.addImm(0);
482 
483     // Ensure that the object file has a __indirect_function_table import, as we
484     // call_indirect against it.
485     MCSymbolWasm *Sym = WebAssembly::getOrCreateFunctionTableSymbol(
486         MF.getContext(), "__indirect_function_table");
487     // Until call_indirect emits TABLE_NUMBER relocs against this symbol, mark
488     // it as NO_STRIP so as to ensure that the indirect function table makes it
489     // to linked output.
490     Sym->setNoStrip();
491   }
492 
493   for (auto Use : CallParams.uses())
494     MIB.add(Use);
495 
496   BB->insert(CallResults.getIterator(), MIB);
497   CallParams.eraseFromParent();
498   CallResults.eraseFromParent();
499 
500   return BB;
501 }
502 
503 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
504     MachineInstr &MI, MachineBasicBlock *BB) const {
505   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
506   DebugLoc DL = MI.getDebugLoc();
507 
508   switch (MI.getOpcode()) {
509   default:
510     llvm_unreachable("Unexpected instr type to insert");
511   case WebAssembly::FP_TO_SINT_I32_F32:
512     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
513                         WebAssembly::I32_TRUNC_S_F32);
514   case WebAssembly::FP_TO_UINT_I32_F32:
515     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
516                         WebAssembly::I32_TRUNC_U_F32);
517   case WebAssembly::FP_TO_SINT_I64_F32:
518     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
519                         WebAssembly::I64_TRUNC_S_F32);
520   case WebAssembly::FP_TO_UINT_I64_F32:
521     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
522                         WebAssembly::I64_TRUNC_U_F32);
523   case WebAssembly::FP_TO_SINT_I32_F64:
524     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
525                         WebAssembly::I32_TRUNC_S_F64);
526   case WebAssembly::FP_TO_UINT_I32_F64:
527     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
528                         WebAssembly::I32_TRUNC_U_F64);
529   case WebAssembly::FP_TO_SINT_I64_F64:
530     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
531                         WebAssembly::I64_TRUNC_S_F64);
532   case WebAssembly::FP_TO_UINT_I64_F64:
533     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
534                         WebAssembly::I64_TRUNC_U_F64);
535   case WebAssembly::CALL_RESULTS:
536   case WebAssembly::RET_CALL_RESULTS:
537     return LowerCallResults(MI, DL, BB, TII);
538   }
539 }
540 
541 const char *
542 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
543   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
544   case WebAssemblyISD::FIRST_NUMBER:
545   case WebAssemblyISD::FIRST_MEM_OPCODE:
546     break;
547 #define HANDLE_NODETYPE(NODE)                                                  \
548   case WebAssemblyISD::NODE:                                                   \
549     return "WebAssemblyISD::" #NODE;
550 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
551 #include "WebAssemblyISD.def"
552 #undef HANDLE_MEM_NODETYPE
553 #undef HANDLE_NODETYPE
554   }
555   return nullptr;
556 }
557 
558 std::pair<unsigned, const TargetRegisterClass *>
559 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
560     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
561   // First, see if this is a constraint that directly corresponds to a
562   // WebAssembly register class.
563   if (Constraint.size() == 1) {
564     switch (Constraint[0]) {
565     case 'r':
566       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
567       if (Subtarget->hasSIMD128() && VT.isVector()) {
568         if (VT.getSizeInBits() == 128)
569           return std::make_pair(0U, &WebAssembly::V128RegClass);
570       }
571       if (VT.isInteger() && !VT.isVector()) {
572         if (VT.getSizeInBits() <= 32)
573           return std::make_pair(0U, &WebAssembly::I32RegClass);
574         if (VT.getSizeInBits() <= 64)
575           return std::make_pair(0U, &WebAssembly::I64RegClass);
576       }
577       if (VT.isFloatingPoint() && !VT.isVector()) {
578         switch (VT.getSizeInBits()) {
579         case 32:
580           return std::make_pair(0U, &WebAssembly::F32RegClass);
581         case 64:
582           return std::make_pair(0U, &WebAssembly::F64RegClass);
583         default:
584           break;
585         }
586       }
587       break;
588     default:
589       break;
590     }
591   }
592 
593   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
594 }
595 
596 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
597   // Assume ctz is a relatively cheap operation.
598   return true;
599 }
600 
601 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
602   // Assume clz is a relatively cheap operation.
603   return true;
604 }
605 
606 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
607                                                       const AddrMode &AM,
608                                                       Type *Ty, unsigned AS,
609                                                       Instruction *I) const {
610   // WebAssembly offsets are added as unsigned without wrapping. The
611   // isLegalAddressingMode gives us no way to determine if wrapping could be
612   // happening, so we approximate this by accepting only non-negative offsets.
613   if (AM.BaseOffs < 0)
614     return false;
615 
616   // WebAssembly has no scale register operands.
617   if (AM.Scale != 0)
618     return false;
619 
620   // Everything else is legal.
621   return true;
622 }
623 
624 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
625     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
626     MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
627   // WebAssembly supports unaligned accesses, though it should be declared
628   // with the p2align attribute on loads and stores which do so, and there
629   // may be a performance impact. We tell LLVM they're "fast" because
630   // for the kinds of things that LLVM uses this for (merging adjacent stores
631   // of constants, etc.), WebAssembly implementations will either want the
632   // unaligned access or they'll split anyway.
633   if (Fast)
634     *Fast = true;
635   return true;
636 }
637 
638 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
639                                               AttributeList Attr) const {
640   // The current thinking is that wasm engines will perform this optimization,
641   // so we can save on code size.
642   return true;
643 }
644 
645 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
646   EVT ExtT = ExtVal.getValueType();
647   EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
648   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
649          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
650          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
651 }
652 
653 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
654                                                   LLVMContext &C,
655                                                   EVT VT) const {
656   if (VT.isVector())
657     return VT.changeVectorElementTypeToInteger();
658 
659   // So far, all branch instructions in Wasm take an I32 condition.
660   // The default TargetLowering::getSetCCResultType returns the pointer size,
661   // which would be useful to reduce instruction counts when testing
662   // against 64-bit pointers/values if at some point Wasm supports that.
663   return EVT::getIntegerVT(C, 32);
664 }
665 
666 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
667                                                    const CallInst &I,
668                                                    MachineFunction &MF,
669                                                    unsigned Intrinsic) const {
670   switch (Intrinsic) {
671   case Intrinsic::wasm_memory_atomic_notify:
672     Info.opc = ISD::INTRINSIC_W_CHAIN;
673     Info.memVT = MVT::i32;
674     Info.ptrVal = I.getArgOperand(0);
675     Info.offset = 0;
676     Info.align = Align(4);
677     // atomic.notify instruction does not really load the memory specified with
678     // this argument, but MachineMemOperand should either be load or store, so
679     // we set this to a load.
680     // FIXME Volatile isn't really correct, but currently all LLVM atomic
681     // instructions are treated as volatiles in the backend, so we should be
682     // consistent. The same applies for wasm_atomic_wait intrinsics too.
683     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
684     return true;
685   case Intrinsic::wasm_memory_atomic_wait32:
686     Info.opc = ISD::INTRINSIC_W_CHAIN;
687     Info.memVT = MVT::i32;
688     Info.ptrVal = I.getArgOperand(0);
689     Info.offset = 0;
690     Info.align = Align(4);
691     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
692     return true;
693   case Intrinsic::wasm_memory_atomic_wait64:
694     Info.opc = ISD::INTRINSIC_W_CHAIN;
695     Info.memVT = MVT::i64;
696     Info.ptrVal = I.getArgOperand(0);
697     Info.offset = 0;
698     Info.align = Align(8);
699     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
700     return true;
701   case Intrinsic::wasm_load32_zero:
702   case Intrinsic::wasm_load64_zero:
703     Info.opc = ISD::INTRINSIC_W_CHAIN;
704     Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64;
705     Info.ptrVal = I.getArgOperand(0);
706     Info.offset = 0;
707     Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8);
708     Info.flags = MachineMemOperand::MOLoad;
709     return true;
710   case Intrinsic::wasm_load8_lane:
711   case Intrinsic::wasm_load16_lane:
712   case Intrinsic::wasm_load32_lane:
713   case Intrinsic::wasm_load64_lane:
714   case Intrinsic::wasm_store8_lane:
715   case Intrinsic::wasm_store16_lane:
716   case Intrinsic::wasm_store32_lane:
717   case Intrinsic::wasm_store64_lane: {
718     MVT MemVT;
719     Align MemAlign;
720     switch (Intrinsic) {
721     case Intrinsic::wasm_load8_lane:
722     case Intrinsic::wasm_store8_lane:
723       MemVT = MVT::i8;
724       MemAlign = Align(1);
725       break;
726     case Intrinsic::wasm_load16_lane:
727     case Intrinsic::wasm_store16_lane:
728       MemVT = MVT::i16;
729       MemAlign = Align(2);
730       break;
731     case Intrinsic::wasm_load32_lane:
732     case Intrinsic::wasm_store32_lane:
733       MemVT = MVT::i32;
734       MemAlign = Align(4);
735       break;
736     case Intrinsic::wasm_load64_lane:
737     case Intrinsic::wasm_store64_lane:
738       MemVT = MVT::i64;
739       MemAlign = Align(8);
740       break;
741     default:
742       llvm_unreachable("unexpected intrinsic");
743     }
744     if (Intrinsic == Intrinsic::wasm_load8_lane ||
745         Intrinsic == Intrinsic::wasm_load16_lane ||
746         Intrinsic == Intrinsic::wasm_load32_lane ||
747         Intrinsic == Intrinsic::wasm_load64_lane) {
748       Info.opc = ISD::INTRINSIC_W_CHAIN;
749       Info.flags = MachineMemOperand::MOLoad;
750     } else {
751       Info.opc = ISD::INTRINSIC_VOID;
752       Info.flags = MachineMemOperand::MOStore;
753     }
754     Info.ptrVal = I.getArgOperand(0);
755     Info.memVT = MemVT;
756     Info.offset = 0;
757     Info.align = MemAlign;
758     return true;
759   }
760   case Intrinsic::wasm_prefetch_t:
761   case Intrinsic::wasm_prefetch_nt: {
762     Info.opc = ISD::INTRINSIC_VOID;
763     Info.memVT = MVT::i8;
764     Info.ptrVal = I.getArgOperand(0);
765     Info.offset = 0;
766     Info.align = Align(1);
767     Info.flags = MachineMemOperand::MOLoad;
768     return true;
769   }
770   default:
771     return false;
772   }
773 }
774 
775 //===----------------------------------------------------------------------===//
776 // WebAssembly Lowering private implementation.
777 //===----------------------------------------------------------------------===//
778 
779 //===----------------------------------------------------------------------===//
780 // Lowering Code
781 //===----------------------------------------------------------------------===//
782 
783 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
784   MachineFunction &MF = DAG.getMachineFunction();
785   DAG.getContext()->diagnose(
786       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
787 }
788 
789 // Test whether the given calling convention is supported.
790 static bool callingConvSupported(CallingConv::ID CallConv) {
791   // We currently support the language-independent target-independent
792   // conventions. We don't yet have a way to annotate calls with properties like
793   // "cold", and we don't have any call-clobbered registers, so these are mostly
794   // all handled the same.
795   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
796          CallConv == CallingConv::Cold ||
797          CallConv == CallingConv::PreserveMost ||
798          CallConv == CallingConv::PreserveAll ||
799          CallConv == CallingConv::CXX_FAST_TLS ||
800          CallConv == CallingConv::WASM_EmscriptenInvoke ||
801          CallConv == CallingConv::Swift;
802 }
803 
804 SDValue
805 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
806                                      SmallVectorImpl<SDValue> &InVals) const {
807   SelectionDAG &DAG = CLI.DAG;
808   SDLoc DL = CLI.DL;
809   SDValue Chain = CLI.Chain;
810   SDValue Callee = CLI.Callee;
811   MachineFunction &MF = DAG.getMachineFunction();
812   auto Layout = MF.getDataLayout();
813 
814   CallingConv::ID CallConv = CLI.CallConv;
815   if (!callingConvSupported(CallConv))
816     fail(DL, DAG,
817          "WebAssembly doesn't support language-specific or target-specific "
818          "calling conventions yet");
819   if (CLI.IsPatchPoint)
820     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
821 
822   if (CLI.IsTailCall) {
823     auto NoTail = [&](const char *Msg) {
824       if (CLI.CB && CLI.CB->isMustTailCall())
825         fail(DL, DAG, Msg);
826       CLI.IsTailCall = false;
827     };
828 
829     if (!Subtarget->hasTailCall())
830       NoTail("WebAssembly 'tail-call' feature not enabled");
831 
832     // Varargs calls cannot be tail calls because the buffer is on the stack
833     if (CLI.IsVarArg)
834       NoTail("WebAssembly does not support varargs tail calls");
835 
836     // Do not tail call unless caller and callee return types match
837     const Function &F = MF.getFunction();
838     const TargetMachine &TM = getTargetMachine();
839     Type *RetTy = F.getReturnType();
840     SmallVector<MVT, 4> CallerRetTys;
841     SmallVector<MVT, 4> CalleeRetTys;
842     computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
843     computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
844     bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
845                       std::equal(CallerRetTys.begin(), CallerRetTys.end(),
846                                  CalleeRetTys.begin());
847     if (!TypesMatch)
848       NoTail("WebAssembly tail call requires caller and callee return types to "
849              "match");
850 
851     // If pointers to local stack values are passed, we cannot tail call
852     if (CLI.CB) {
853       for (auto &Arg : CLI.CB->args()) {
854         Value *Val = Arg.get();
855         // Trace the value back through pointer operations
856         while (true) {
857           Value *Src = Val->stripPointerCastsAndAliases();
858           if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
859             Src = GEP->getPointerOperand();
860           if (Val == Src)
861             break;
862           Val = Src;
863         }
864         if (isa<AllocaInst>(Val)) {
865           NoTail(
866               "WebAssembly does not support tail calling with stack arguments");
867           break;
868         }
869       }
870     }
871   }
872 
873   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
874   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
875   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
876 
877   // The generic code may have added an sret argument. If we're lowering an
878   // invoke function, the ABI requires that the function pointer be the first
879   // argument, so we may have to swap the arguments.
880   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
881       Outs[0].Flags.isSRet()) {
882     std::swap(Outs[0], Outs[1]);
883     std::swap(OutVals[0], OutVals[1]);
884   }
885 
886   bool HasSwiftSelfArg = false;
887   bool HasSwiftErrorArg = false;
888   unsigned NumFixedArgs = 0;
889   for (unsigned I = 0; I < Outs.size(); ++I) {
890     const ISD::OutputArg &Out = Outs[I];
891     SDValue &OutVal = OutVals[I];
892     HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
893     HasSwiftErrorArg |= Out.Flags.isSwiftError();
894     if (Out.Flags.isNest())
895       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
896     if (Out.Flags.isInAlloca())
897       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
898     if (Out.Flags.isInConsecutiveRegs())
899       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
900     if (Out.Flags.isInConsecutiveRegsLast())
901       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
902     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
903       auto &MFI = MF.getFrameInfo();
904       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
905                                      Out.Flags.getNonZeroByValAlign(),
906                                      /*isSS=*/false);
907       SDValue SizeNode =
908           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
909       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
910       Chain = DAG.getMemcpy(
911           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
912           /*isVolatile*/ false, /*AlwaysInline=*/false,
913           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
914       OutVal = FINode;
915     }
916     // Count the number of fixed args *after* legalization.
917     NumFixedArgs += Out.IsFixed;
918   }
919 
920   bool IsVarArg = CLI.IsVarArg;
921   auto PtrVT = getPointerTy(Layout);
922 
923   // For swiftcc, emit additional swiftself and swifterror arguments
924   // if there aren't. These additional arguments are also added for callee
925   // signature They are necessary to match callee and caller signature for
926   // indirect call.
927   if (CallConv == CallingConv::Swift) {
928     if (!HasSwiftSelfArg) {
929       NumFixedArgs++;
930       ISD::OutputArg Arg;
931       Arg.Flags.setSwiftSelf();
932       CLI.Outs.push_back(Arg);
933       SDValue ArgVal = DAG.getUNDEF(PtrVT);
934       CLI.OutVals.push_back(ArgVal);
935     }
936     if (!HasSwiftErrorArg) {
937       NumFixedArgs++;
938       ISD::OutputArg Arg;
939       Arg.Flags.setSwiftError();
940       CLI.Outs.push_back(Arg);
941       SDValue ArgVal = DAG.getUNDEF(PtrVT);
942       CLI.OutVals.push_back(ArgVal);
943     }
944   }
945 
946   // Analyze operands of the call, assigning locations to each operand.
947   SmallVector<CCValAssign, 16> ArgLocs;
948   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
949 
950   if (IsVarArg) {
951     // Outgoing non-fixed arguments are placed in a buffer. First
952     // compute their offsets and the total amount of buffer space needed.
953     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
954       const ISD::OutputArg &Out = Outs[I];
955       SDValue &Arg = OutVals[I];
956       EVT VT = Arg.getValueType();
957       assert(VT != MVT::iPTR && "Legalized args should be concrete");
958       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
959       Align Alignment =
960           std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
961       unsigned Offset =
962           CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
963       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
964                                         Offset, VT.getSimpleVT(),
965                                         CCValAssign::Full));
966     }
967   }
968 
969   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
970 
971   SDValue FINode;
972   if (IsVarArg && NumBytes) {
973     // For non-fixed arguments, next emit stores to store the argument values
974     // to the stack buffer at the offsets computed above.
975     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
976                                                  Layout.getStackAlignment(),
977                                                  /*isSS=*/false);
978     unsigned ValNo = 0;
979     SmallVector<SDValue, 8> Chains;
980     for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
981       assert(ArgLocs[ValNo].getValNo() == ValNo &&
982              "ArgLocs should remain in order and only hold varargs args");
983       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
984       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
985       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
986                                 DAG.getConstant(Offset, DL, PtrVT));
987       Chains.push_back(
988           DAG.getStore(Chain, DL, Arg, Add,
989                        MachinePointerInfo::getFixedStack(MF, FI, Offset)));
990     }
991     if (!Chains.empty())
992       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
993   } else if (IsVarArg) {
994     FINode = DAG.getIntPtrConstant(0, DL);
995   }
996 
997   if (Callee->getOpcode() == ISD::GlobalAddress) {
998     // If the callee is a GlobalAddress node (quite common, every direct call
999     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1000     // doesn't at MO_GOT which is not needed for direct calls.
1001     GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
1002     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
1003                                         getPointerTy(DAG.getDataLayout()),
1004                                         GA->getOffset());
1005     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1006                          getPointerTy(DAG.getDataLayout()), Callee);
1007   }
1008 
1009   // Compute the operands for the CALLn node.
1010   SmallVector<SDValue, 16> Ops;
1011   Ops.push_back(Chain);
1012   Ops.push_back(Callee);
1013 
1014   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1015   // isn't reliable.
1016   Ops.append(OutVals.begin(),
1017              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1018   // Add a pointer to the vararg buffer.
1019   if (IsVarArg)
1020     Ops.push_back(FINode);
1021 
1022   SmallVector<EVT, 8> InTys;
1023   for (const auto &In : Ins) {
1024     assert(!In.Flags.isByVal() && "byval is not valid for return values");
1025     assert(!In.Flags.isNest() && "nest is not valid for return values");
1026     if (In.Flags.isInAlloca())
1027       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1028     if (In.Flags.isInConsecutiveRegs())
1029       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1030     if (In.Flags.isInConsecutiveRegsLast())
1031       fail(DL, DAG,
1032            "WebAssembly hasn't implemented cons regs last return values");
1033     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1034     // registers.
1035     InTys.push_back(In.VT);
1036   }
1037 
1038   if (CLI.IsTailCall) {
1039     // ret_calls do not return values to the current frame
1040     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1041     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1042   }
1043 
1044   InTys.push_back(MVT::Other);
1045   SDVTList InTyList = DAG.getVTList(InTys);
1046   SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1047 
1048   for (size_t I = 0; I < Ins.size(); ++I)
1049     InVals.push_back(Res.getValue(I));
1050 
1051   // Return the chain
1052   return Res.getValue(Ins.size());
1053 }
1054 
1055 bool WebAssemblyTargetLowering::CanLowerReturn(
1056     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1057     const SmallVectorImpl<ISD::OutputArg> &Outs,
1058     LLVMContext & /*Context*/) const {
1059   // WebAssembly can only handle returning tuples with multivalue enabled
1060   return Subtarget->hasMultivalue() || Outs.size() <= 1;
1061 }
1062 
1063 SDValue WebAssemblyTargetLowering::LowerReturn(
1064     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1065     const SmallVectorImpl<ISD::OutputArg> &Outs,
1066     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1067     SelectionDAG &DAG) const {
1068   assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
1069          "MVP WebAssembly can only return up to one value");
1070   if (!callingConvSupported(CallConv))
1071     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1072 
1073   SmallVector<SDValue, 4> RetOps(1, Chain);
1074   RetOps.append(OutVals.begin(), OutVals.end());
1075   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1076 
1077   // Record the number and types of the return values.
1078   for (const ISD::OutputArg &Out : Outs) {
1079     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1080     assert(!Out.Flags.isNest() && "nest is not valid for return values");
1081     assert(Out.IsFixed && "non-fixed return value is not valid");
1082     if (Out.Flags.isInAlloca())
1083       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1084     if (Out.Flags.isInConsecutiveRegs())
1085       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1086     if (Out.Flags.isInConsecutiveRegsLast())
1087       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1088   }
1089 
1090   return Chain;
1091 }
1092 
1093 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1094     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1095     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1096     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1097   if (!callingConvSupported(CallConv))
1098     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1099 
1100   MachineFunction &MF = DAG.getMachineFunction();
1101   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1102 
1103   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1104   // of the incoming values before they're represented by virtual registers.
1105   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1106 
1107   bool HasSwiftErrorArg = false;
1108   bool HasSwiftSelfArg = false;
1109   for (const ISD::InputArg &In : Ins) {
1110     HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1111     HasSwiftErrorArg |= In.Flags.isSwiftError();
1112     if (In.Flags.isInAlloca())
1113       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1114     if (In.Flags.isNest())
1115       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1116     if (In.Flags.isInConsecutiveRegs())
1117       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1118     if (In.Flags.isInConsecutiveRegsLast())
1119       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1120     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1121     // registers.
1122     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1123                                            DAG.getTargetConstant(InVals.size(),
1124                                                                  DL, MVT::i32))
1125                              : DAG.getUNDEF(In.VT));
1126 
1127     // Record the number and types of arguments.
1128     MFI->addParam(In.VT);
1129   }
1130 
1131   // For swiftcc, emit additional swiftself and swifterror arguments
1132   // if there aren't. These additional arguments are also added for callee
1133   // signature They are necessary to match callee and caller signature for
1134   // indirect call.
1135   auto PtrVT = getPointerTy(MF.getDataLayout());
1136   if (CallConv == CallingConv::Swift) {
1137     if (!HasSwiftSelfArg) {
1138       MFI->addParam(PtrVT);
1139     }
1140     if (!HasSwiftErrorArg) {
1141       MFI->addParam(PtrVT);
1142     }
1143   }
1144   // Varargs are copied into a buffer allocated by the caller, and a pointer to
1145   // the buffer is passed as an argument.
1146   if (IsVarArg) {
1147     MVT PtrVT = getPointerTy(MF.getDataLayout());
1148     Register VarargVreg =
1149         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
1150     MFI->setVarargBufferVreg(VarargVreg);
1151     Chain = DAG.getCopyToReg(
1152         Chain, DL, VarargVreg,
1153         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1154                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1155     MFI->addParam(PtrVT);
1156   }
1157 
1158   // Record the number and types of arguments and results.
1159   SmallVector<MVT, 4> Params;
1160   SmallVector<MVT, 4> Results;
1161   computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
1162                       MF.getFunction(), DAG.getTarget(), Params, Results);
1163   for (MVT VT : Results)
1164     MFI->addResult(VT);
1165   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1166   // the param logic here with ComputeSignatureVTs
1167   assert(MFI->getParams().size() == Params.size() &&
1168          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1169                     Params.begin()));
1170 
1171   return Chain;
1172 }
1173 
1174 void WebAssemblyTargetLowering::ReplaceNodeResults(
1175     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1176   switch (N->getOpcode()) {
1177   case ISD::SIGN_EXTEND_INREG:
1178     // Do not add any results, signifying that N should not be custom lowered
1179     // after all. This happens because simd128 turns on custom lowering for
1180     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1181     // illegal type.
1182     break;
1183   default:
1184     llvm_unreachable(
1185         "ReplaceNodeResults not implemented for this op for WebAssembly!");
1186   }
1187 }
1188 
1189 //===----------------------------------------------------------------------===//
1190 //  Custom lowering hooks.
1191 //===----------------------------------------------------------------------===//
1192 
1193 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1194                                                   SelectionDAG &DAG) const {
1195   SDLoc DL(Op);
1196   switch (Op.getOpcode()) {
1197   default:
1198     llvm_unreachable("unimplemented operation lowering");
1199     return SDValue();
1200   case ISD::FrameIndex:
1201     return LowerFrameIndex(Op, DAG);
1202   case ISD::GlobalAddress:
1203     return LowerGlobalAddress(Op, DAG);
1204   case ISD::GlobalTLSAddress:
1205     return LowerGlobalTLSAddress(Op, DAG);
1206   case ISD::ExternalSymbol:
1207     return LowerExternalSymbol(Op, DAG);
1208   case ISD::JumpTable:
1209     return LowerJumpTable(Op, DAG);
1210   case ISD::BR_JT:
1211     return LowerBR_JT(Op, DAG);
1212   case ISD::VASTART:
1213     return LowerVASTART(Op, DAG);
1214   case ISD::BlockAddress:
1215   case ISD::BRIND:
1216     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1217     return SDValue();
1218   case ISD::RETURNADDR:
1219     return LowerRETURNADDR(Op, DAG);
1220   case ISD::FRAMEADDR:
1221     return LowerFRAMEADDR(Op, DAG);
1222   case ISD::CopyToReg:
1223     return LowerCopyToReg(Op, DAG);
1224   case ISD::EXTRACT_VECTOR_ELT:
1225   case ISD::INSERT_VECTOR_ELT:
1226     return LowerAccessVectorElement(Op, DAG);
1227   case ISD::INTRINSIC_VOID:
1228   case ISD::INTRINSIC_WO_CHAIN:
1229   case ISD::INTRINSIC_W_CHAIN:
1230     return LowerIntrinsic(Op, DAG);
1231   case ISD::SIGN_EXTEND_INREG:
1232     return LowerSIGN_EXTEND_INREG(Op, DAG);
1233   case ISD::BUILD_VECTOR:
1234     return LowerBUILD_VECTOR(Op, DAG);
1235   case ISD::VECTOR_SHUFFLE:
1236     return LowerVECTOR_SHUFFLE(Op, DAG);
1237   case ISD::SETCC:
1238     return LowerSETCC(Op, DAG);
1239   case ISD::SHL:
1240   case ISD::SRA:
1241   case ISD::SRL:
1242     return LowerShift(Op, DAG);
1243   }
1244 }
1245 
1246 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1247                                                   SelectionDAG &DAG) const {
1248   SDValue Src = Op.getOperand(2);
1249   if (isa<FrameIndexSDNode>(Src.getNode())) {
1250     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1251     // the FI to some LEA-like instruction, but since we don't have that, we
1252     // need to insert some kind of instruction that can take an FI operand and
1253     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1254     // local.copy between Op and its FI operand.
1255     SDValue Chain = Op.getOperand(0);
1256     SDLoc DL(Op);
1257     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1258     EVT VT = Src.getValueType();
1259     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1260                                                    : WebAssembly::COPY_I64,
1261                                     DL, VT, Src),
1262                  0);
1263     return Op.getNode()->getNumValues() == 1
1264                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1265                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1266                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1267                                                            : SDValue());
1268   }
1269   return SDValue();
1270 }
1271 
1272 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1273                                                    SelectionDAG &DAG) const {
1274   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1275   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1276 }
1277 
1278 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1279                                                    SelectionDAG &DAG) const {
1280   SDLoc DL(Op);
1281 
1282   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1283     fail(DL, DAG,
1284          "Non-Emscripten WebAssembly hasn't implemented "
1285          "__builtin_return_address");
1286     return SDValue();
1287   }
1288 
1289   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1290     return SDValue();
1291 
1292   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1293   MakeLibCallOptions CallOptions;
1294   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1295                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1296       .first;
1297 }
1298 
1299 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1300                                                   SelectionDAG &DAG) const {
1301   // Non-zero depths are not supported by WebAssembly currently. Use the
1302   // legalizer's default expansion, which is to return 0 (what this function is
1303   // documented to do).
1304   if (Op.getConstantOperandVal(0) > 0)
1305     return SDValue();
1306 
1307   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1308   EVT VT = Op.getValueType();
1309   Register FP =
1310       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1311   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1312 }
1313 
1314 SDValue
1315 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1316                                                  SelectionDAG &DAG) const {
1317   SDLoc DL(Op);
1318   const auto *GA = cast<GlobalAddressSDNode>(Op);
1319   MVT PtrVT = getPointerTy(DAG.getDataLayout());
1320 
1321   MachineFunction &MF = DAG.getMachineFunction();
1322   if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1323     report_fatal_error("cannot use thread-local storage without bulk memory",
1324                        false);
1325 
1326   const GlobalValue *GV = GA->getGlobal();
1327 
1328   // Currently Emscripten does not support dynamic linking with threads.
1329   // Therefore, if we have thread-local storage, only the local-exec model
1330   // is possible.
1331   // TODO: remove this and implement proper TLS models once Emscripten
1332   // supports dynamic linking with threads.
1333   if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel &&
1334       !Subtarget->getTargetTriple().isOSEmscripten()) {
1335     report_fatal_error("only -ftls-model=local-exec is supported for now on "
1336                        "non-Emscripten OSes: variable " +
1337                            GV->getName(),
1338                        false);
1339   }
1340 
1341   auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1342                                      : WebAssembly::GLOBAL_GET_I32;
1343   const char *BaseName = MF.createExternalSymbolName("__tls_base");
1344 
1345   SDValue BaseAddr(
1346       DAG.getMachineNode(GlobalGet, DL, PtrVT,
1347                          DAG.getTargetExternalSymbol(BaseName, PtrVT)),
1348       0);
1349 
1350   SDValue TLSOffset = DAG.getTargetGlobalAddress(
1351       GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
1352   SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset);
1353 
1354   return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
1355 }
1356 
1357 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1358                                                       SelectionDAG &DAG) const {
1359   SDLoc DL(Op);
1360   const auto *GA = cast<GlobalAddressSDNode>(Op);
1361   EVT VT = Op.getValueType();
1362   assert(GA->getTargetFlags() == 0 &&
1363          "Unexpected target flags on generic GlobalAddressSDNode");
1364   if (GA->getAddressSpace() != 0)
1365     fail(DL, DAG, "WebAssembly only expects the 0 address space");
1366 
1367   unsigned OperandFlags = 0;
1368   if (isPositionIndependent()) {
1369     const GlobalValue *GV = GA->getGlobal();
1370     if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1371       MachineFunction &MF = DAG.getMachineFunction();
1372       MVT PtrVT = getPointerTy(MF.getDataLayout());
1373       const char *BaseName;
1374       if (GV->getValueType()->isFunctionTy()) {
1375         BaseName = MF.createExternalSymbolName("__table_base");
1376         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1377       }
1378       else {
1379         BaseName = MF.createExternalSymbolName("__memory_base");
1380         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1381       }
1382       SDValue BaseAddr =
1383           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1384                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
1385 
1386       SDValue SymAddr = DAG.getNode(
1387           WebAssemblyISD::WrapperPIC, DL, VT,
1388           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1389                                      OperandFlags));
1390 
1391       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1392     } else {
1393       OperandFlags = WebAssemblyII::MO_GOT;
1394     }
1395   }
1396 
1397   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1398                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1399                                                 GA->getOffset(), OperandFlags));
1400 }
1401 
1402 SDValue
1403 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1404                                                SelectionDAG &DAG) const {
1405   SDLoc DL(Op);
1406   const auto *ES = cast<ExternalSymbolSDNode>(Op);
1407   EVT VT = Op.getValueType();
1408   assert(ES->getTargetFlags() == 0 &&
1409          "Unexpected target flags on generic ExternalSymbolSDNode");
1410   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1411                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1412 }
1413 
1414 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1415                                                   SelectionDAG &DAG) const {
1416   // There's no need for a Wrapper node because we always incorporate a jump
1417   // table operand into a BR_TABLE instruction, rather than ever
1418   // materializing it in a register.
1419   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1420   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1421                                 JT->getTargetFlags());
1422 }
1423 
1424 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1425                                               SelectionDAG &DAG) const {
1426   SDLoc DL(Op);
1427   SDValue Chain = Op.getOperand(0);
1428   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1429   SDValue Index = Op.getOperand(2);
1430   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1431 
1432   SmallVector<SDValue, 8> Ops;
1433   Ops.push_back(Chain);
1434   Ops.push_back(Index);
1435 
1436   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1437   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1438 
1439   // Add an operand for each case.
1440   for (auto MBB : MBBs)
1441     Ops.push_back(DAG.getBasicBlock(MBB));
1442 
1443   // Add the first MBB as a dummy default target for now. This will be replaced
1444   // with the proper default target (and the preceding range check eliminated)
1445   // if possible by WebAssemblyFixBrTableDefaults.
1446   Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
1447   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1448 }
1449 
1450 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1451                                                 SelectionDAG &DAG) const {
1452   SDLoc DL(Op);
1453   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1454 
1455   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1456   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1457 
1458   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1459                                     MFI->getVarargBufferVreg(), PtrVT);
1460   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1461                       MachinePointerInfo(SV));
1462 }
1463 
1464 static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex,
1465                                       SelectionDAG &DAG) {
1466   // We only support C++ exceptions for now
1467   int Tag =
1468       cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue();
1469   if (Tag != WebAssembly::CPP_EXCEPTION)
1470     llvm_unreachable("Invalid tag: We only support C++ exceptions for now");
1471   auto &MF = DAG.getMachineFunction();
1472   const auto &TLI = DAG.getTargetLoweringInfo();
1473   MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1474   const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1475   return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT,
1476                      DAG.getTargetExternalSymbol(SymName, PtrVT));
1477 }
1478 
1479 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1480                                                   SelectionDAG &DAG) const {
1481   MachineFunction &MF = DAG.getMachineFunction();
1482   unsigned IntNo;
1483   switch (Op.getOpcode()) {
1484   case ISD::INTRINSIC_VOID:
1485   case ISD::INTRINSIC_W_CHAIN:
1486     IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1487     break;
1488   case ISD::INTRINSIC_WO_CHAIN:
1489     IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1490     break;
1491   default:
1492     llvm_unreachable("Invalid intrinsic");
1493   }
1494   SDLoc DL(Op);
1495 
1496   switch (IntNo) {
1497   default:
1498     return SDValue(); // Don't custom lower most intrinsics.
1499 
1500   case Intrinsic::wasm_lsda: {
1501     EVT VT = Op.getValueType();
1502     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1503     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1504     auto &Context = MF.getMMI().getContext();
1505     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1506                                             Twine(MF.getFunctionNumber()));
1507     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1508                        DAG.getMCSymbol(S, PtrVT));
1509   }
1510 
1511   case Intrinsic::wasm_throw: {
1512     SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
1513     return DAG.getNode(WebAssemblyISD::THROW, DL,
1514                        MVT::Other, // outchain type
1515                        {
1516                            Op.getOperand(0), // inchain
1517                            SymNode,          // exception symbol
1518                            Op.getOperand(3)  // thrown value
1519                        });
1520   }
1521 
1522   case Intrinsic::wasm_catch: {
1523     SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
1524     return DAG.getNode(WebAssemblyISD::CATCH, DL,
1525                        {
1526                            MVT::i32,  // outchain type
1527                            MVT::Other // return value
1528                        },
1529                        {
1530                            Op.getOperand(0), // inchain
1531                            SymNode           // exception symbol
1532                        });
1533   }
1534 
1535   case Intrinsic::wasm_shuffle: {
1536     // Drop in-chain and replace undefs, but otherwise pass through unchanged
1537     SDValue Ops[18];
1538     size_t OpIdx = 0;
1539     Ops[OpIdx++] = Op.getOperand(1);
1540     Ops[OpIdx++] = Op.getOperand(2);
1541     while (OpIdx < 18) {
1542       const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
1543       if (MaskIdx.isUndef() ||
1544           cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
1545         Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
1546       } else {
1547         Ops[OpIdx++] = MaskIdx;
1548       }
1549     }
1550     return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1551   }
1552   }
1553 }
1554 
1555 SDValue
1556 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1557                                                   SelectionDAG &DAG) const {
1558   SDLoc DL(Op);
1559   // If sign extension operations are disabled, allow sext_inreg only if operand
1560   // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1561   // extension operations, but allowing sext_inreg in this context lets us have
1562   // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1563   // everywhere would be simpler in this file, but would necessitate large and
1564   // brittle patterns to undo the expansion and select extract_lane_s
1565   // instructions.
1566   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1567   if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1568     return SDValue();
1569 
1570   const SDValue &Extract = Op.getOperand(0);
1571   MVT VecT = Extract.getOperand(0).getSimpleValueType();
1572   if (VecT.getVectorElementType().getSizeInBits() > 32)
1573     return SDValue();
1574   MVT ExtractedLaneT =
1575       cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
1576   MVT ExtractedVecT =
1577       MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1578   if (ExtractedVecT == VecT)
1579     return Op;
1580 
1581   // Bitcast vector to appropriate type to ensure ISel pattern coverage
1582   const SDNode *Index = Extract.getOperand(1).getNode();
1583   if (!isa<ConstantSDNode>(Index))
1584     return SDValue();
1585   unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue();
1586   unsigned Scale =
1587       ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1588   assert(Scale > 1);
1589   SDValue NewIndex =
1590       DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
1591   SDValue NewExtract = DAG.getNode(
1592       ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1593       DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1594   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
1595                      Op.getOperand(1));
1596 }
1597 
1598 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1599                                                      SelectionDAG &DAG) const {
1600   SDLoc DL(Op);
1601   const EVT VecT = Op.getValueType();
1602   const EVT LaneT = Op.getOperand(0).getValueType();
1603   const size_t Lanes = Op.getNumOperands();
1604   bool CanSwizzle = VecT == MVT::v16i8;
1605 
1606   // BUILD_VECTORs are lowered to the instruction that initializes the highest
1607   // possible number of lanes at once followed by a sequence of replace_lane
1608   // instructions to individually initialize any remaining lanes.
1609 
1610   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1611   // swizzled lanes should be given greater weight.
1612 
1613   // TODO: Investigate building vectors by shuffling together vectors built by
1614   // separately specialized means.
1615 
1616   auto IsConstant = [](const SDValue &V) {
1617     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1618   };
1619 
1620   // Returns the source vector and index vector pair if they exist. Checks for:
1621   //   (extract_vector_elt
1622   //     $src,
1623   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1624   //   )
1625   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1626     auto Bail = std::make_pair(SDValue(), SDValue());
1627     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1628       return Bail;
1629     const SDValue &SwizzleSrc = Lane->getOperand(0);
1630     const SDValue &IndexExt = Lane->getOperand(1);
1631     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1632       return Bail;
1633     const SDValue &Index = IndexExt->getOperand(0);
1634     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1635       return Bail;
1636     const SDValue &SwizzleIndices = Index->getOperand(0);
1637     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1638         SwizzleIndices.getValueType() != MVT::v16i8 ||
1639         Index->getOperand(1)->getOpcode() != ISD::Constant ||
1640         Index->getConstantOperandVal(1) != I)
1641       return Bail;
1642     return std::make_pair(SwizzleSrc, SwizzleIndices);
1643   };
1644 
1645   using ValueEntry = std::pair<SDValue, size_t>;
1646   SmallVector<ValueEntry, 16> SplatValueCounts;
1647 
1648   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1649   SmallVector<SwizzleEntry, 16> SwizzleCounts;
1650 
1651   auto AddCount = [](auto &Counts, const auto &Val) {
1652     auto CountIt =
1653         llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
1654     if (CountIt == Counts.end()) {
1655       Counts.emplace_back(Val, 1);
1656     } else {
1657       CountIt->second++;
1658     }
1659   };
1660 
1661   auto GetMostCommon = [](auto &Counts) {
1662     auto CommonIt =
1663         std::max_element(Counts.begin(), Counts.end(),
1664                          [](auto A, auto B) { return A.second < B.second; });
1665     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1666     return *CommonIt;
1667   };
1668 
1669   size_t NumConstantLanes = 0;
1670 
1671   // Count eligible lanes for each type of vector creation op
1672   for (size_t I = 0; I < Lanes; ++I) {
1673     const SDValue &Lane = Op->getOperand(I);
1674     if (Lane.isUndef())
1675       continue;
1676 
1677     AddCount(SplatValueCounts, Lane);
1678 
1679     if (IsConstant(Lane)) {
1680       NumConstantLanes++;
1681     } else if (CanSwizzle) {
1682       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1683       if (SwizzleSrcs.first)
1684         AddCount(SwizzleCounts, SwizzleSrcs);
1685     }
1686   }
1687 
1688   SDValue SplatValue;
1689   size_t NumSplatLanes;
1690   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1691 
1692   SDValue SwizzleSrc;
1693   SDValue SwizzleIndices;
1694   size_t NumSwizzleLanes = 0;
1695   if (SwizzleCounts.size())
1696     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1697                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1698 
1699   // Predicate returning true if the lane is properly initialized by the
1700   // original instruction
1701   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1702   SDValue Result;
1703   // Prefer swizzles over vector consts over splats
1704   if (NumSwizzleLanes >= NumSplatLanes &&
1705       (!Subtarget->hasUnimplementedSIMD128() ||
1706        NumSwizzleLanes >= NumConstantLanes)) {
1707     Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1708                          SwizzleIndices);
1709     auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1710     IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1711       return Swizzled == GetSwizzleSrcs(I, Lane);
1712     };
1713   } else if (NumConstantLanes >= NumSplatLanes &&
1714              Subtarget->hasUnimplementedSIMD128()) {
1715     // If we support v128.const, emit it directly
1716     SmallVector<SDValue, 16> ConstLanes;
1717     for (const SDValue &Lane : Op->op_values()) {
1718       if (IsConstant(Lane)) {
1719         ConstLanes.push_back(Lane);
1720       } else if (LaneT.isFloatingPoint()) {
1721         ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1722       } else {
1723         ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1724       }
1725     }
1726     Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1727     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
1728       return IsConstant(Lane);
1729     };
1730   } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) {
1731     // Otherwise, if this is an integer vector, pack the lane values together so
1732     // we can construct the 128-bit constant from a pair of i64s using a splat
1733     // followed by at most one i64x2.replace_lane. Also keep track of the lanes
1734     // that actually matter so we can avoid the replace_lane in more cases.
1735     std::array<uint64_t, 2> I64s{{0, 0}};
1736     std::array<uint64_t, 2> ConstLaneMasks{{0, 0}};
1737     size_t LaneBits = 128 / Lanes;
1738     size_t HalfLanes = Lanes / 2;
1739     for (size_t I = 0; I < Lanes; ++I) {
1740       const SDValue &Lane = Op.getOperand(I);
1741       if (IsConstant(Lane)) {
1742         // How much we need to shift Val to position it in an i64
1743         auto Shift = LaneBits * (I % HalfLanes);
1744         auto Mask = maskTrailingOnes<uint64_t>(LaneBits);
1745         auto Val = cast<ConstantSDNode>(Lane.getNode())->getZExtValue() & Mask;
1746         I64s[I / HalfLanes] |= Val << Shift;
1747         ConstLaneMasks[I / HalfLanes] |= Mask << Shift;
1748       }
1749     }
1750     // Check whether all constant lanes in the second half of the vector are
1751     // equivalent in the first half or vice versa to determine whether splatting
1752     // either side will be sufficient to materialize the constant. As a special
1753     // case, if the first and second halves have no constant lanes in common, we
1754     // can just combine them.
1755     bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1];
1756     bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0];
1757     bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0;
1758 
1759     uint64_t Splatted;
1760     if (SecondHalfSufficient) {
1761       Splatted = I64s[1];
1762     } else if (CombinedSufficient) {
1763       Splatted = I64s[0] | I64s[1];
1764     } else {
1765       Splatted = I64s[0];
1766     }
1767 
1768     Result = DAG.getSplatBuildVector(MVT::v2i64, DL,
1769                                      DAG.getConstant(Splatted, DL, MVT::i64));
1770     if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) {
1771       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result,
1772                            DAG.getConstant(I64s[1], DL, MVT::i64),
1773                            DAG.getConstant(1, DL, MVT::i32));
1774     }
1775     Result = DAG.getBitcast(VecT, Result);
1776     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
1777       return IsConstant(Lane);
1778     };
1779   } else {
1780     // Use a splat, but possibly a load_splat
1781     LoadSDNode *SplattedLoad;
1782     if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1783         SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1784       Result = DAG.getMemIntrinsicNode(
1785           WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1786           {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1787            SplattedLoad->getOffset()},
1788           SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1789     } else {
1790       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1791     }
1792     IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
1793       return Lane == SplatValue;
1794     };
1795   }
1796 
1797   assert(Result);
1798   assert(IsLaneConstructed);
1799 
1800   // Add replace_lane instructions for any unhandled values
1801   for (size_t I = 0; I < Lanes; ++I) {
1802     const SDValue &Lane = Op->getOperand(I);
1803     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1804       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1805                            DAG.getConstant(I, DL, MVT::i32));
1806   }
1807 
1808   return Result;
1809 }
1810 
1811 SDValue
1812 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1813                                                SelectionDAG &DAG) const {
1814   SDLoc DL(Op);
1815   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1816   MVT VecType = Op.getOperand(0).getSimpleValueType();
1817   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1818   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1819 
1820   // Space for two vector args and sixteen mask indices
1821   SDValue Ops[18];
1822   size_t OpIdx = 0;
1823   Ops[OpIdx++] = Op.getOperand(0);
1824   Ops[OpIdx++] = Op.getOperand(1);
1825 
1826   // Expand mask indices to byte indices and materialize them as operands
1827   for (int M : Mask) {
1828     for (size_t J = 0; J < LaneBytes; ++J) {
1829       // Lower undefs (represented by -1 in mask) to zero
1830       uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1831       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1832     }
1833   }
1834 
1835   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1836 }
1837 
1838 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1839                                               SelectionDAG &DAG) const {
1840   SDLoc DL(Op);
1841   // The legalizer does not know how to expand the comparison modes of i64x2
1842   // vectors because no comparison modes are supported. We could solve this by
1843   // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
1844   // (which return i64x2 results) as well. So instead we manually unroll i64x2
1845   // comparisons here.
1846   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1847   SmallVector<SDValue, 2> LHS, RHS;
1848   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1849   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1850   const SDValue &CC = Op->getOperand(2);
1851   auto MakeLane = [&](unsigned I) {
1852     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1853                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1854                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1855   };
1856   return DAG.getBuildVector(Op->getValueType(0), DL,
1857                             {MakeLane(0), MakeLane(1)});
1858 }
1859 
1860 SDValue
1861 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1862                                                     SelectionDAG &DAG) const {
1863   // Allow constant lane indices, expand variable lane indices
1864   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1865   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1866     return Op;
1867   else
1868     // Perform default expansion
1869     return SDValue();
1870 }
1871 
1872 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1873   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1874   // 32-bit and 64-bit unrolled shifts will have proper semantics
1875   if (LaneT.bitsGE(MVT::i32))
1876     return DAG.UnrollVectorOp(Op.getNode());
1877   // Otherwise mask the shift value to get proper semantics from 32-bit shift
1878   SDLoc DL(Op);
1879   size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
1880   SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
1881   unsigned ShiftOpcode = Op.getOpcode();
1882   SmallVector<SDValue, 16> ShiftedElements;
1883   DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
1884   SmallVector<SDValue, 16> ShiftElements;
1885   DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
1886   SmallVector<SDValue, 16> UnrolledOps;
1887   for (size_t i = 0; i < NumLanes; ++i) {
1888     SDValue MaskedShiftValue =
1889         DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
1890     SDValue ShiftedValue = ShiftedElements[i];
1891     if (ShiftOpcode == ISD::SRA)
1892       ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
1893                                  ShiftedValue, DAG.getValueType(LaneT));
1894     UnrolledOps.push_back(
1895         DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
1896   }
1897   return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
1898 }
1899 
1900 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1901                                               SelectionDAG &DAG) const {
1902   SDLoc DL(Op);
1903 
1904   // Only manually lower vector shifts
1905   assert(Op.getSimpleValueType().isVector());
1906 
1907   auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
1908   if (!ShiftVal)
1909     return unrollVectorShift(Op, DAG);
1910 
1911   // Use anyext because none of the high bits can affect the shift
1912   ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
1913 
1914   unsigned Opcode;
1915   switch (Op.getOpcode()) {
1916   case ISD::SHL:
1917     Opcode = WebAssemblyISD::VEC_SHL;
1918     break;
1919   case ISD::SRA:
1920     Opcode = WebAssemblyISD::VEC_SHR_S;
1921     break;
1922   case ISD::SRL:
1923     Opcode = WebAssemblyISD::VEC_SHR_U;
1924     break;
1925   default:
1926     llvm_unreachable("unexpected opcode");
1927   }
1928 
1929   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
1930 }
1931 
1932 //===----------------------------------------------------------------------===//
1933 //   Custom DAG combine hooks
1934 //===----------------------------------------------------------------------===//
1935 static SDValue
1936 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1937   auto &DAG = DCI.DAG;
1938   auto Shuffle = cast<ShuffleVectorSDNode>(N);
1939 
1940   // Hoist vector bitcasts that don't change the number of lanes out of unary
1941   // shuffles, where they are less likely to get in the way of other combines.
1942   // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
1943   //  (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
1944   SDValue Bitcast = N->getOperand(0);
1945   if (Bitcast.getOpcode() != ISD::BITCAST)
1946     return SDValue();
1947   if (!N->getOperand(1).isUndef())
1948     return SDValue();
1949   SDValue CastOp = Bitcast.getOperand(0);
1950   MVT SrcType = CastOp.getSimpleValueType();
1951   MVT DstType = Bitcast.getSimpleValueType();
1952   if (!SrcType.is128BitVector() ||
1953       SrcType.getVectorNumElements() != DstType.getVectorNumElements())
1954     return SDValue();
1955   SDValue NewShuffle = DAG.getVectorShuffle(
1956       SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
1957   return DAG.getBitcast(DstType, NewShuffle);
1958 }
1959 
1960 static SDValue performVectorWidenCombine(SDNode *N,
1961                                          TargetLowering::DAGCombinerInfo &DCI) {
1962   auto &DAG = DCI.DAG;
1963   assert(N->getOpcode() == ISD::SIGN_EXTEND ||
1964          N->getOpcode() == ISD::ZERO_EXTEND);
1965 
1966   // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
1967   // possible before the extract_subvector can be expanded.
1968   auto Extract = N->getOperand(0);
1969   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
1970     return SDValue();
1971   auto Source = Extract.getOperand(0);
1972   auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
1973   if (IndexNode == nullptr)
1974     return SDValue();
1975   auto Index = IndexNode->getZExtValue();
1976 
1977   // Only v8i8 and v4i16 extracts can be widened, and only if the extracted
1978   // subvector is the low or high half of its source.
1979   EVT ResVT = N->getValueType(0);
1980   if (ResVT == MVT::v8i16) {
1981     if (Extract.getValueType() != MVT::v8i8 ||
1982         Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
1983       return SDValue();
1984   } else if (ResVT == MVT::v4i32) {
1985     if (Extract.getValueType() != MVT::v4i16 ||
1986         Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
1987       return SDValue();
1988   } else {
1989     return SDValue();
1990   }
1991 
1992   bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
1993   bool IsLow = Index == 0;
1994 
1995   unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::WIDEN_LOW_S
1996                                 : WebAssemblyISD::WIDEN_HIGH_S)
1997                        : (IsLow ? WebAssemblyISD::WIDEN_LOW_U
1998                                 : WebAssemblyISD::WIDEN_HIGH_U);
1999 
2000   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2001 }
2002 
2003 SDValue
2004 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
2005                                              DAGCombinerInfo &DCI) const {
2006   switch (N->getOpcode()) {
2007   default:
2008     return SDValue();
2009   case ISD::VECTOR_SHUFFLE:
2010     return performVECTOR_SHUFFLECombine(N, DCI);
2011   case ISD::SIGN_EXTEND:
2012   case ISD::ZERO_EXTEND:
2013     return performVectorWidenCombine(N, DCI);
2014   }
2015 }
2016