xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "Utils/WebAssemblyTypeUtilities.h"
17 #include "WebAssemblyMachineFunctionInfo.h"
18 #include "WebAssemblySubtarget.h"
19 #include "WebAssemblyTargetMachine.h"
20 #include "WebAssemblyUtilities.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineJumpTableInfo.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SDPatternMatch.h"
28 #include "llvm/CodeGen/SelectionDAG.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Intrinsics.h"
35 #include "llvm/IR/IntrinsicsWebAssembly.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/KnownBits.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Target/TargetOptions.h"
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "wasm-lower"
43 
44 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
45     const TargetMachine &TM, const WebAssemblySubtarget &STI)
46     : TargetLowering(TM), Subtarget(&STI) {
47   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48 
49   // Booleans always contain 0 or 1.
50   setBooleanContents(ZeroOrOneBooleanContent);
51   // Except in SIMD vectors
52   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
53   // We don't know the microarchitecture here, so just reduce register pressure.
54   setSchedulingPreference(Sched::RegPressure);
55   // Tell ISel that we have a stack pointer.
56   setStackPointerRegisterToSaveRestore(
57       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
58   // Set up the register classes.
59   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
60   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
61   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
62   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
63   if (Subtarget->hasSIMD128()) {
64     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
65     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
67     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
68     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
69     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
70   }
71   if (Subtarget->hasFP16()) {
72     addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
73   }
74   if (Subtarget->hasReferenceTypes()) {
75     addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
76     addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
77     if (Subtarget->hasExceptionHandling()) {
78       addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
79     }
80   }
81   // Compute derived properties from the register classes.
82   computeRegisterProperties(Subtarget->getRegisterInfo());
83 
84   // Transform loads and stores to pointers in address space 1 to loads and
85   // stores to WebAssembly global variables, outside linear memory.
86   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
87     setOperationAction(ISD::LOAD, T, Custom);
88     setOperationAction(ISD::STORE, T, Custom);
89   }
90   if (Subtarget->hasSIMD128()) {
91     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
92                    MVT::v2f64}) {
93       setOperationAction(ISD::LOAD, T, Custom);
94       setOperationAction(ISD::STORE, T, Custom);
95     }
96   }
97   if (Subtarget->hasFP16()) {
98     setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
99     setOperationAction(ISD::STORE, MVT::v8f16, Custom);
100   }
101   if (Subtarget->hasReferenceTypes()) {
102     // We need custom load and store lowering for both externref, funcref and
103     // Other. The MVT::Other here represents tables of reference types.
104     for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
105       setOperationAction(ISD::LOAD, T, Custom);
106       setOperationAction(ISD::STORE, T, Custom);
107     }
108   }
109 
110   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
111   setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);
112   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
113   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
114   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
115   setOperationAction(ISD::BRIND, MVT::Other, Custom);
116   setOperationAction(ISD::CLEAR_CACHE, MVT::Other, Custom);
117 
118   // Take the default expansion for va_arg, va_copy, and va_end. There is no
119   // default action for va_start, so we do that custom.
120   setOperationAction(ISD::VASTART, MVT::Other, Custom);
121   setOperationAction(ISD::VAARG, MVT::Other, Expand);
122   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
123   setOperationAction(ISD::VAEND, MVT::Other, Expand);
124 
125   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
126     if (!Subtarget->hasFP16() && T == MVT::v8f16) {
127       continue;
128     }
129     // Don't expand the floating-point types to constant pools.
130     setOperationAction(ISD::ConstantFP, T, Legal);
131     // Expand floating-point comparisons.
132     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
133                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
134       setCondCodeAction(CC, T, Expand);
135     // Expand floating-point library function operators.
136     for (auto Op :
137          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
138       setOperationAction(Op, T, Expand);
139     // Note supported floating-point library function operators that otherwise
140     // default to expand.
141     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
142                     ISD::FRINT, ISD::FROUNDEVEN})
143       setOperationAction(Op, T, Legal);
144     // Support minimum and maximum, which otherwise default to expand.
145     setOperationAction(ISD::FMINIMUM, T, Legal);
146     setOperationAction(ISD::FMAXIMUM, T, Legal);
147     // When experimental v8f16 support is enabled these instructions don't need
148     // to be expanded.
149     if (T != MVT::v8f16) {
150       setOperationAction(ISD::FP16_TO_FP, T, Expand);
151       setOperationAction(ISD::FP_TO_FP16, T, Expand);
152     }
153     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
154     setTruncStoreAction(T, MVT::f16, Expand);
155   }
156 
157   // Expand unavailable integer operations.
158   for (auto Op :
159        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
160         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
161         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
162     for (auto T : {MVT::i32, MVT::i64})
163       setOperationAction(Op, T, Expand);
164     if (Subtarget->hasSIMD128())
165       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
166         setOperationAction(Op, T, Expand);
167   }
168 
169   if (Subtarget->hasWideArithmetic()) {
170     setOperationAction(ISD::ADD, MVT::i128, Custom);
171     setOperationAction(ISD::SUB, MVT::i128, Custom);
172     setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
173     setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
174     setOperationAction(ISD::UADDO, MVT::i64, Custom);
175   }
176 
177   if (Subtarget->hasNontrappingFPToInt())
178     for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
179       for (auto T : {MVT::i32, MVT::i64})
180         setOperationAction(Op, T, Custom);
181 
182   // SIMD-specific configuration
183   if (Subtarget->hasSIMD128()) {
184 
185     // Combine partial.reduce.add before legalization gets confused.
186     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
187 
188     // Combine wide-vector muls, with extend inputs, to extmul_half.
189     setTargetDAGCombine(ISD::MUL);
190 
191     // Combine vector mask reductions into alltrue/anytrue
192     setTargetDAGCombine(ISD::SETCC);
193 
194     // Convert vector to integer bitcasts to bitmask
195     setTargetDAGCombine(ISD::BITCAST);
196 
197     // Hoist bitcasts out of shuffles
198     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
199 
200     // Combine extends of extract_subvectors into widening ops
201     setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND});
202 
203     // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
204     // conversions ops
205     setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND,
206                          ISD::EXTRACT_SUBVECTOR});
207 
208     // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
209     // into conversion ops
210     setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
211                          ISD::FP_ROUND, ISD::CONCAT_VECTORS});
212 
213     setTargetDAGCombine(ISD::TRUNCATE);
214 
215     // Support saturating add/sub for i8x16 and i16x8
216     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
217       for (auto T : {MVT::v16i8, MVT::v8i16})
218         setOperationAction(Op, T, Legal);
219 
220     // Support integer abs
221     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
222       setOperationAction(ISD::ABS, T, Legal);
223 
224     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
225     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
226                    MVT::v2f64})
227       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
228 
229     if (Subtarget->hasFP16())
230       setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
231 
232     // We have custom shuffle lowering to expose the shuffle mask
233     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
234                    MVT::v2f64})
235       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
236 
237     if (Subtarget->hasFP16())
238       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom);
239 
240     // Support splatting
241     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
242                    MVT::v2f64})
243       setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
244 
245     // Custom lowering since wasm shifts must have a scalar shift amount
246     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
247       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
248         setOperationAction(Op, T, Custom);
249 
250     // Custom lower lane accesses to expand out variable indices
251     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
252       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
253                      MVT::v2f64})
254         setOperationAction(Op, T, Custom);
255 
256     // There is no i8x16.mul instruction
257     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
258 
259     // There is no vector conditional select instruction
260     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
261                    MVT::v2f64})
262       setOperationAction(ISD::SELECT_CC, T, Expand);
263 
264     // Expand integer operations supported for scalars but not SIMD
265     for (auto Op :
266          {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
267       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
268         setOperationAction(Op, T, Expand);
269 
270     // But we do have integer min and max operations
271     for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
272       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
273         setOperationAction(Op, T, Legal);
274 
275     // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
276     setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
277     setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
278     setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
279 
280     // Custom lower bit counting operations for other types to scalarize them.
281     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
282       for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
283         setOperationAction(Op, T, Custom);
284 
285     // Expand float operations supported for scalars but not SIMD
286     for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
287                     ISD::FEXP, ISD::FEXP2})
288       for (auto T : {MVT::v4f32, MVT::v2f64})
289         setOperationAction(Op, T, Expand);
290 
291     // Unsigned comparison operations are unavailable for i64x2 vectors.
292     for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})
293       setCondCodeAction(CC, MVT::v2i64, Custom);
294 
295     // 64x2 conversions are not in the spec
296     for (auto Op :
297          {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
298       for (auto T : {MVT::v2i64, MVT::v2f64})
299         setOperationAction(Op, T, Expand);
300 
301     // But saturating fp_to_int converstions are
302     for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) {
303       setOperationAction(Op, MVT::v4i32, Custom);
304       if (Subtarget->hasFP16()) {
305         setOperationAction(Op, MVT::v8i16, Custom);
306       }
307     }
308 
309     // Support vector extending
310     for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
311       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Custom);
312       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
313     }
314   }
315 
316   // As a special case, these operators use the type to mean the type to
317   // sign-extend from.
318   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
319   if (!Subtarget->hasSignExt()) {
320     // Sign extends are legal only when extending a vector extract
321     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
322     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
323       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
324   }
325   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
326     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
327 
328   // Dynamic stack allocation: use the default expansion.
329   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
330   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
331   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
332 
333   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
334   setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
335   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
336 
337   // Expand these forms; we pattern-match the forms that we can handle in isel.
338   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
339     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
340       setOperationAction(Op, T, Expand);
341 
342   // We have custom switch handling.
343   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
344 
345   // WebAssembly doesn't have:
346   //  - Floating-point extending loads.
347   //  - Floating-point truncating stores.
348   //  - i1 extending loads.
349   //  - truncating SIMD stores and most extending loads
350   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
351   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
352   for (auto T : MVT::integer_valuetypes())
353     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
354       setLoadExtAction(Ext, T, MVT::i1, Promote);
355   if (Subtarget->hasSIMD128()) {
356     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
357                    MVT::v2f64}) {
358       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
359         if (MVT(T) != MemT) {
360           setTruncStoreAction(T, MemT, Expand);
361           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
362             setLoadExtAction(Ext, T, MemT, Expand);
363         }
364       }
365     }
366     // But some vector extending loads are legal
367     for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
368       setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
369       setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
370       setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
371     }
372     setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
373   }
374 
375   // Don't do anything clever with build_pairs
376   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
377 
378   // Trap lowers to wasm unreachable
379   setOperationAction(ISD::TRAP, MVT::Other, Legal);
380   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
381 
382   // Exception handling intrinsics
383   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
384   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
385   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
386 
387   setMaxAtomicSizeInBitsSupported(64);
388 
389   // Always convert switches to br_tables unless there is only one case, which
390   // is equivalent to a simple branch. This reduces code size for wasm, and we
391   // defer possible jump table optimizations to the VM.
392   setMinimumJumpTableEntries(2);
393 }
394 
395 MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL,
396                                             uint32_t AS) const {
397   if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
398     return MVT::externref;
399   if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
400     return MVT::funcref;
401   return TargetLowering::getPointerTy(DL, AS);
402 }
403 
404 MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL,
405                                                uint32_t AS) const {
406   if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
407     return MVT::externref;
408   if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
409     return MVT::funcref;
410   return TargetLowering::getPointerMemTy(DL, AS);
411 }
412 
413 bool WebAssemblyTargetLowering::shouldExpandPartialReductionIntrinsic(
414     const IntrinsicInst *I) const {
415   if (I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add)
416     return true;
417 
418   EVT VT = EVT::getEVT(I->getType());
419   auto Op1 = I->getOperand(1);
420 
421   if (auto *InputInst = dyn_cast<Instruction>(Op1)) {
422     if (InstructionOpcodeToISD(InputInst->getOpcode()) != ISD::MUL)
423       return true;
424 
425     if (isa<Instruction>(InputInst->getOperand(0)) &&
426         isa<Instruction>(InputInst->getOperand(1))) {
427       // dot only supports signed inputs but also support lowering unsigned.
428       if (cast<Instruction>(InputInst->getOperand(0))->getOpcode() !=
429           cast<Instruction>(InputInst->getOperand(1))->getOpcode())
430         return true;
431 
432       EVT Op1VT = EVT::getEVT(Op1->getType());
433       if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
434           ((VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount()) ||
435            (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount())))
436         return false;
437     }
438   }
439   return true;
440 }
441 
442 TargetLowering::AtomicExpansionKind
443 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
444   // We have wasm instructions for these
445   switch (AI->getOperation()) {
446   case AtomicRMWInst::Add:
447   case AtomicRMWInst::Sub:
448   case AtomicRMWInst::And:
449   case AtomicRMWInst::Or:
450   case AtomicRMWInst::Xor:
451   case AtomicRMWInst::Xchg:
452     return AtomicExpansionKind::None;
453   default:
454     break;
455   }
456   return AtomicExpansionKind::CmpXChg;
457 }
458 
459 bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
460   // Implementation copied from X86TargetLowering.
461   unsigned Opc = VecOp.getOpcode();
462 
463   // Assume target opcodes can't be scalarized.
464   // TODO - do we have any exceptions?
465   if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
466     return false;
467 
468   // If the vector op is not supported, try to convert to scalar.
469   EVT VecVT = VecOp.getValueType();
470   if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
471     return true;
472 
473   // If the vector op is supported, but the scalar op is not, the transform may
474   // not be worthwhile.
475   EVT ScalarVT = VecVT.getScalarType();
476   return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
477 }
478 
479 FastISel *WebAssemblyTargetLowering::createFastISel(
480     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
481   return WebAssembly::createFastISel(FuncInfo, LibInfo);
482 }
483 
484 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
485                                                       EVT VT) const {
486   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
487   if (BitWidth > 1 && BitWidth < 8)
488     BitWidth = 8;
489 
490   if (BitWidth > 64) {
491     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
492     // the count to be an i32.
493     BitWidth = 32;
494     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
495            "32-bit shift counts ought to be enough for anyone");
496   }
497 
498   MVT Result = MVT::getIntegerVT(BitWidth);
499   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
500          "Unable to represent scalar shift amount type");
501   return Result;
502 }
503 
504 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
505 // undefined result on invalid/overflow, to the WebAssembly opcode, which
506 // traps on invalid/overflow.
507 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
508                                        MachineBasicBlock *BB,
509                                        const TargetInstrInfo &TII,
510                                        bool IsUnsigned, bool Int64,
511                                        bool Float64, unsigned LoweredOpcode) {
512   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
513 
514   Register OutReg = MI.getOperand(0).getReg();
515   Register InReg = MI.getOperand(1).getReg();
516 
517   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
518   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
519   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
520   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
521   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
522   unsigned Eqz = WebAssembly::EQZ_I32;
523   unsigned And = WebAssembly::AND_I32;
524   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
525   int64_t Substitute = IsUnsigned ? 0 : Limit;
526   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
527   auto &Context = BB->getParent()->getFunction().getContext();
528   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
529 
530   const BasicBlock *LLVMBB = BB->getBasicBlock();
531   MachineFunction *F = BB->getParent();
532   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
533   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
534   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
535 
536   MachineFunction::iterator It = ++BB->getIterator();
537   F->insert(It, FalseMBB);
538   F->insert(It, TrueMBB);
539   F->insert(It, DoneMBB);
540 
541   // Transfer the remainder of BB and its successor edges to DoneMBB.
542   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
543   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
544 
545   BB->addSuccessor(TrueMBB);
546   BB->addSuccessor(FalseMBB);
547   TrueMBB->addSuccessor(DoneMBB);
548   FalseMBB->addSuccessor(DoneMBB);
549 
550   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
551   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
552   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
553   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
554   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
555   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
556   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
557 
558   MI.eraseFromParent();
559   // For signed numbers, we can do a single comparison to determine whether
560   // fabs(x) is within range.
561   if (IsUnsigned) {
562     Tmp0 = InReg;
563   } else {
564     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
565   }
566   BuildMI(BB, DL, TII.get(FConst), Tmp1)
567       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
568   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
569 
570   // For unsigned numbers, we have to do a separate comparison with zero.
571   if (IsUnsigned) {
572     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
573     Register SecondCmpReg =
574         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
575     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
576     BuildMI(BB, DL, TII.get(FConst), Tmp1)
577         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
578     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
579     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
580     CmpReg = AndReg;
581   }
582 
583   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
584 
585   // Create the CFG diamond to select between doing the conversion or using
586   // the substitute value.
587   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
588   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
589   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
590   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
591   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
592       .addReg(FalseReg)
593       .addMBB(FalseMBB)
594       .addReg(TrueReg)
595       .addMBB(TrueMBB);
596 
597   return DoneMBB;
598 }
599 
600 // Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
601 // instuction to handle the zero-length case.
602 static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL,
603                                       MachineBasicBlock *BB,
604                                       const TargetInstrInfo &TII, bool Int64) {
605   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
606 
607   MachineOperand DstMem = MI.getOperand(0);
608   MachineOperand SrcMem = MI.getOperand(1);
609   MachineOperand Dst = MI.getOperand(2);
610   MachineOperand Src = MI.getOperand(3);
611   MachineOperand Len = MI.getOperand(4);
612 
613   // We're going to add an extra use to `Len` to test if it's zero; that
614   // use shouldn't be a kill, even if the original use is.
615   MachineOperand NoKillLen = Len;
616   NoKillLen.setIsKill(false);
617 
618   // Decide on which `MachineInstr` opcode we're going to use.
619   unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
620   unsigned MemoryCopy =
621       Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
622 
623   // Create two new basic blocks; one for the new `memory.fill` that we can
624   // branch over, and one for the rest of the instructions after the original
625   // `memory.fill`.
626   const BasicBlock *LLVMBB = BB->getBasicBlock();
627   MachineFunction *F = BB->getParent();
628   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
629   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
630 
631   MachineFunction::iterator It = ++BB->getIterator();
632   F->insert(It, TrueMBB);
633   F->insert(It, DoneMBB);
634 
635   // Transfer the remainder of BB and its successor edges to DoneMBB.
636   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
637   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
638 
639   // Connect the CFG edges.
640   BB->addSuccessor(TrueMBB);
641   BB->addSuccessor(DoneMBB);
642   TrueMBB->addSuccessor(DoneMBB);
643 
644   // Create a virtual register for the `Eqz` result.
645   unsigned EqzReg;
646   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
647 
648   // Erase the original `memory.copy`.
649   MI.eraseFromParent();
650 
651   // Test if `Len` is zero.
652   BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
653 
654   // Insert a new `memory.copy`.
655   BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
656       .add(DstMem)
657       .add(SrcMem)
658       .add(Dst)
659       .add(Src)
660       .add(Len);
661 
662   // Create the CFG triangle.
663   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
664   BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
665 
666   return DoneMBB;
667 }
668 
669 // Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
670 // instuction to handle the zero-length case.
671 static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL,
672                                       MachineBasicBlock *BB,
673                                       const TargetInstrInfo &TII, bool Int64) {
674   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
675 
676   MachineOperand Mem = MI.getOperand(0);
677   MachineOperand Dst = MI.getOperand(1);
678   MachineOperand Val = MI.getOperand(2);
679   MachineOperand Len = MI.getOperand(3);
680 
681   // We're going to add an extra use to `Len` to test if it's zero; that
682   // use shouldn't be a kill, even if the original use is.
683   MachineOperand NoKillLen = Len;
684   NoKillLen.setIsKill(false);
685 
686   // Decide on which `MachineInstr` opcode we're going to use.
687   unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
688   unsigned MemoryFill =
689       Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
690 
691   // Create two new basic blocks; one for the new `memory.fill` that we can
692   // branch over, and one for the rest of the instructions after the original
693   // `memory.fill`.
694   const BasicBlock *LLVMBB = BB->getBasicBlock();
695   MachineFunction *F = BB->getParent();
696   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
697   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
698 
699   MachineFunction::iterator It = ++BB->getIterator();
700   F->insert(It, TrueMBB);
701   F->insert(It, DoneMBB);
702 
703   // Transfer the remainder of BB and its successor edges to DoneMBB.
704   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
705   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
706 
707   // Connect the CFG edges.
708   BB->addSuccessor(TrueMBB);
709   BB->addSuccessor(DoneMBB);
710   TrueMBB->addSuccessor(DoneMBB);
711 
712   // Create a virtual register for the `Eqz` result.
713   unsigned EqzReg;
714   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
715 
716   // Erase the original `memory.fill`.
717   MI.eraseFromParent();
718 
719   // Test if `Len` is zero.
720   BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
721 
722   // Insert a new `memory.copy`.
723   BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
724 
725   // Create the CFG triangle.
726   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
727   BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
728 
729   return DoneMBB;
730 }
731 
732 static MachineBasicBlock *
733 LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
734                  const WebAssemblySubtarget *Subtarget,
735                  const TargetInstrInfo &TII) {
736   MachineInstr &CallParams = *CallResults.getPrevNode();
737   assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
738   assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
739          CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
740 
741   bool IsIndirect =
742       CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
743   bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
744 
745   bool IsFuncrefCall = false;
746   if (IsIndirect && CallParams.getOperand(0).isReg()) {
747     Register Reg = CallParams.getOperand(0).getReg();
748     const MachineFunction *MF = BB->getParent();
749     const MachineRegisterInfo &MRI = MF->getRegInfo();
750     const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
751     IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
752     assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
753   }
754 
755   unsigned CallOp;
756   if (IsIndirect && IsRetCall) {
757     CallOp = WebAssembly::RET_CALL_INDIRECT;
758   } else if (IsIndirect) {
759     CallOp = WebAssembly::CALL_INDIRECT;
760   } else if (IsRetCall) {
761     CallOp = WebAssembly::RET_CALL;
762   } else {
763     CallOp = WebAssembly::CALL;
764   }
765 
766   MachineFunction &MF = *BB->getParent();
767   const MCInstrDesc &MCID = TII.get(CallOp);
768   MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
769 
770   // Move the function pointer to the end of the arguments for indirect calls
771   if (IsIndirect) {
772     auto FnPtr = CallParams.getOperand(0);
773     CallParams.removeOperand(0);
774 
775     // For funcrefs, call_indirect is done through __funcref_call_table and the
776     // funcref is always installed in slot 0 of the table, therefore instead of
777     // having the function pointer added at the end of the params list, a zero
778     // (the index in
779     // __funcref_call_table is added).
780     if (IsFuncrefCall) {
781       Register RegZero =
782           MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
783       MachineInstrBuilder MIBC0 =
784           BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
785 
786       BB->insert(CallResults.getIterator(), MIBC0);
787       MachineInstrBuilder(MF, CallParams).addReg(RegZero);
788     } else
789       CallParams.addOperand(FnPtr);
790   }
791 
792   for (auto Def : CallResults.defs())
793     MIB.add(Def);
794 
795   if (IsIndirect) {
796     // Placeholder for the type index.
797     MIB.addImm(0);
798     // The table into which this call_indirect indexes.
799     MCSymbolWasm *Table = IsFuncrefCall
800                               ? WebAssembly::getOrCreateFuncrefCallTableSymbol(
801                                     MF.getContext(), Subtarget)
802                               : WebAssembly::getOrCreateFunctionTableSymbol(
803                                     MF.getContext(), Subtarget);
804     if (Subtarget->hasCallIndirectOverlong()) {
805       MIB.addSym(Table);
806     } else {
807       // For the MVP there is at most one table whose number is 0, but we can't
808       // write a table symbol or issue relocations.  Instead we just ensure the
809       // table is live and write a zero.
810       Table->setNoStrip();
811       MIB.addImm(0);
812     }
813   }
814 
815   for (auto Use : CallParams.uses())
816     MIB.add(Use);
817 
818   BB->insert(CallResults.getIterator(), MIB);
819   CallParams.eraseFromParent();
820   CallResults.eraseFromParent();
821 
822   // If this is a funcref call, to avoid hidden GC roots, we need to clear the
823   // table slot with ref.null upon call_indirect return.
824   //
825   // This generates the following code, which comes right after a call_indirect
826   // of a funcref:
827   //
828   //    i32.const 0
829   //    ref.null func
830   //    table.set __funcref_call_table
831   if (IsIndirect && IsFuncrefCall) {
832     MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
833         MF.getContext(), Subtarget);
834     Register RegZero =
835         MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
836     MachineInstr *Const0 =
837         BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
838     BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
839 
840     Register RegFuncref =
841         MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
842     MachineInstr *RefNull =
843         BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
844     BB->insertAfter(Const0->getIterator(), RefNull);
845 
846     MachineInstr *TableSet =
847         BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
848             .addSym(Table)
849             .addReg(RegZero)
850             .addReg(RegFuncref);
851     BB->insertAfter(RefNull->getIterator(), TableSet);
852   }
853 
854   return BB;
855 }
856 
857 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
858     MachineInstr &MI, MachineBasicBlock *BB) const {
859   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
860   DebugLoc DL = MI.getDebugLoc();
861 
862   switch (MI.getOpcode()) {
863   default:
864     llvm_unreachable("Unexpected instr type to insert");
865   case WebAssembly::FP_TO_SINT_I32_F32:
866     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
867                         WebAssembly::I32_TRUNC_S_F32);
868   case WebAssembly::FP_TO_UINT_I32_F32:
869     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
870                         WebAssembly::I32_TRUNC_U_F32);
871   case WebAssembly::FP_TO_SINT_I64_F32:
872     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
873                         WebAssembly::I64_TRUNC_S_F32);
874   case WebAssembly::FP_TO_UINT_I64_F32:
875     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
876                         WebAssembly::I64_TRUNC_U_F32);
877   case WebAssembly::FP_TO_SINT_I32_F64:
878     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
879                         WebAssembly::I32_TRUNC_S_F64);
880   case WebAssembly::FP_TO_UINT_I32_F64:
881     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
882                         WebAssembly::I32_TRUNC_U_F64);
883   case WebAssembly::FP_TO_SINT_I64_F64:
884     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
885                         WebAssembly::I64_TRUNC_S_F64);
886   case WebAssembly::FP_TO_UINT_I64_F64:
887     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
888                         WebAssembly::I64_TRUNC_U_F64);
889   case WebAssembly::MEMCPY_A32:
890     return LowerMemcpy(MI, DL, BB, TII, false);
891   case WebAssembly::MEMCPY_A64:
892     return LowerMemcpy(MI, DL, BB, TII, true);
893   case WebAssembly::MEMSET_A32:
894     return LowerMemset(MI, DL, BB, TII, false);
895   case WebAssembly::MEMSET_A64:
896     return LowerMemset(MI, DL, BB, TII, true);
897   case WebAssembly::CALL_RESULTS:
898   case WebAssembly::RET_CALL_RESULTS:
899     return LowerCallResults(MI, DL, BB, Subtarget, TII);
900   }
901 }
902 
903 const char *
904 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
905   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
906   case WebAssemblyISD::FIRST_NUMBER:
907     break;
908 #define HANDLE_NODETYPE(NODE)                                                  \
909   case WebAssemblyISD::NODE:                                                   \
910     return "WebAssemblyISD::" #NODE;
911 #include "WebAssemblyISD.def"
912 #undef HANDLE_NODETYPE
913   }
914   return nullptr;
915 }
916 
917 std::pair<unsigned, const TargetRegisterClass *>
918 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
919     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
920   // First, see if this is a constraint that directly corresponds to a
921   // WebAssembly register class.
922   if (Constraint.size() == 1) {
923     switch (Constraint[0]) {
924     case 'r':
925       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
926       if (Subtarget->hasSIMD128() && VT.isVector()) {
927         if (VT.getSizeInBits() == 128)
928           return std::make_pair(0U, &WebAssembly::V128RegClass);
929       }
930       if (VT.isInteger() && !VT.isVector()) {
931         if (VT.getSizeInBits() <= 32)
932           return std::make_pair(0U, &WebAssembly::I32RegClass);
933         if (VT.getSizeInBits() <= 64)
934           return std::make_pair(0U, &WebAssembly::I64RegClass);
935       }
936       if (VT.isFloatingPoint() && !VT.isVector()) {
937         switch (VT.getSizeInBits()) {
938         case 32:
939           return std::make_pair(0U, &WebAssembly::F32RegClass);
940         case 64:
941           return std::make_pair(0U, &WebAssembly::F64RegClass);
942         default:
943           break;
944         }
945       }
946       break;
947     default:
948       break;
949     }
950   }
951 
952   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
953 }
954 
955 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
956   // Assume ctz is a relatively cheap operation.
957   return true;
958 }
959 
960 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
961   // Assume clz is a relatively cheap operation.
962   return true;
963 }
964 
965 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
966                                                       const AddrMode &AM,
967                                                       Type *Ty, unsigned AS,
968                                                       Instruction *I) const {
969   // WebAssembly offsets are added as unsigned without wrapping. The
970   // isLegalAddressingMode gives us no way to determine if wrapping could be
971   // happening, so we approximate this by accepting only non-negative offsets.
972   if (AM.BaseOffs < 0)
973     return false;
974 
975   // WebAssembly has no scale register operands.
976   if (AM.Scale != 0)
977     return false;
978 
979   // Everything else is legal.
980   return true;
981 }
982 
983 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
984     EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
985     MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
986   // WebAssembly supports unaligned accesses, though it should be declared
987   // with the p2align attribute on loads and stores which do so, and there
988   // may be a performance impact. We tell LLVM they're "fast" because
989   // for the kinds of things that LLVM uses this for (merging adjacent stores
990   // of constants, etc.), WebAssembly implementations will either want the
991   // unaligned access or they'll split anyway.
992   if (Fast)
993     *Fast = 1;
994   return true;
995 }
996 
997 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
998                                               AttributeList Attr) const {
999   // The current thinking is that wasm engines will perform this optimization,
1000   // so we can save on code size.
1001   return true;
1002 }
1003 
1004 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1005   EVT ExtT = ExtVal.getValueType();
1006   EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1007   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1008          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1009          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1010 }
1011 
1012 bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1013     const GlobalAddressSDNode *GA) const {
1014   // Wasm doesn't support function addresses with offsets
1015   const GlobalValue *GV = GA->getGlobal();
1016   return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
1017 }
1018 
1019 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1020                                                   LLVMContext &C,
1021                                                   EVT VT) const {
1022   if (VT.isVector())
1023     return VT.changeVectorElementTypeToInteger();
1024 
1025   // So far, all branch instructions in Wasm take an I32 condition.
1026   // The default TargetLowering::getSetCCResultType returns the pointer size,
1027   // which would be useful to reduce instruction counts when testing
1028   // against 64-bit pointers/values if at some point Wasm supports that.
1029   return EVT::getIntegerVT(C, 32);
1030 }
1031 
1032 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1033                                                    const CallInst &I,
1034                                                    MachineFunction &MF,
1035                                                    unsigned Intrinsic) const {
1036   switch (Intrinsic) {
1037   case Intrinsic::wasm_memory_atomic_notify:
1038     Info.opc = ISD::INTRINSIC_W_CHAIN;
1039     Info.memVT = MVT::i32;
1040     Info.ptrVal = I.getArgOperand(0);
1041     Info.offset = 0;
1042     Info.align = Align(4);
1043     // atomic.notify instruction does not really load the memory specified with
1044     // this argument, but MachineMemOperand should either be load or store, so
1045     // we set this to a load.
1046     // FIXME Volatile isn't really correct, but currently all LLVM atomic
1047     // instructions are treated as volatiles in the backend, so we should be
1048     // consistent. The same applies for wasm_atomic_wait intrinsics too.
1049     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
1050     return true;
1051   case Intrinsic::wasm_memory_atomic_wait32:
1052     Info.opc = ISD::INTRINSIC_W_CHAIN;
1053     Info.memVT = MVT::i32;
1054     Info.ptrVal = I.getArgOperand(0);
1055     Info.offset = 0;
1056     Info.align = Align(4);
1057     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
1058     return true;
1059   case Intrinsic::wasm_memory_atomic_wait64:
1060     Info.opc = ISD::INTRINSIC_W_CHAIN;
1061     Info.memVT = MVT::i64;
1062     Info.ptrVal = I.getArgOperand(0);
1063     Info.offset = 0;
1064     Info.align = Align(8);
1065     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
1066     return true;
1067   case Intrinsic::wasm_loadf16_f32:
1068     Info.opc = ISD::INTRINSIC_W_CHAIN;
1069     Info.memVT = MVT::f16;
1070     Info.ptrVal = I.getArgOperand(0);
1071     Info.offset = 0;
1072     Info.align = Align(2);
1073     Info.flags = MachineMemOperand::MOLoad;
1074     return true;
1075   case Intrinsic::wasm_storef16_f32:
1076     Info.opc = ISD::INTRINSIC_VOID;
1077     Info.memVT = MVT::f16;
1078     Info.ptrVal = I.getArgOperand(1);
1079     Info.offset = 0;
1080     Info.align = Align(2);
1081     Info.flags = MachineMemOperand::MOStore;
1082     return true;
1083   default:
1084     return false;
1085   }
1086 }
1087 
1088 void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1089     const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1090     const SelectionDAG &DAG, unsigned Depth) const {
1091   switch (Op.getOpcode()) {
1092   default:
1093     break;
1094   case ISD::INTRINSIC_WO_CHAIN: {
1095     unsigned IntNo = Op.getConstantOperandVal(0);
1096     switch (IntNo) {
1097     default:
1098       break;
1099     case Intrinsic::wasm_bitmask: {
1100       unsigned BitWidth = Known.getBitWidth();
1101       EVT VT = Op.getOperand(1).getSimpleValueType();
1102       unsigned PossibleBits = VT.getVectorNumElements();
1103       APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1104       Known.Zero |= ZeroMask;
1105       break;
1106     }
1107     }
1108     break;
1109   }
1110 
1111   // For 128-bit addition if the upper bits are all zero then it's known that
1112   // the upper bits of the result will have all bits guaranteed zero except the
1113   // first.
1114   case WebAssemblyISD::I64_ADD128:
1115     if (Op.getResNo() == 1) {
1116       SDValue LHS_HI = Op.getOperand(1);
1117       SDValue RHS_HI = Op.getOperand(3);
1118       if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1119         Known.Zero.setBitsFrom(1);
1120     }
1121     break;
1122   }
1123 }
1124 
1125 TargetLoweringBase::LegalizeTypeAction
1126 WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1127   if (VT.isFixedLengthVector()) {
1128     MVT EltVT = VT.getVectorElementType();
1129     // We have legal vector types with these lane types, so widening the
1130     // vector would let us use some of the lanes directly without having to
1131     // extend or truncate values.
1132     if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1133         EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1134       return TypeWidenVector;
1135   }
1136 
1137   return TargetLoweringBase::getPreferredVectorAction(VT);
1138 }
1139 
1140 bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1141     SDValue Op, const TargetLoweringOpt &TLO) const {
1142   // ISel process runs DAGCombiner after legalization; this step is called
1143   // SelectionDAG optimization phase. This post-legalization combining process
1144   // runs DAGCombiner on each node, and if there was a change to be made,
1145   // re-runs legalization again on it and its user nodes to make sure
1146   // everythiing is in a legalized state.
1147   //
1148   // The legalization calls lowering routines, and we do our custom lowering for
1149   // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1150   // into zeros. But there is a set of routines in DAGCombiner that turns unused
1151   // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1152   // turns unused vector elements into undefs. But this routine does not work
1153   // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1154   // combination can result in a infinite loop, in which undefs are converted to
1155   // zeros in legalization and back to undefs in combining.
1156   //
1157   // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1158   // running for build_vectors.
1159   if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1160     return false;
1161   return true;
1162 }
1163 
1164 //===----------------------------------------------------------------------===//
1165 // WebAssembly Lowering private implementation.
1166 //===----------------------------------------------------------------------===//
1167 
1168 //===----------------------------------------------------------------------===//
1169 // Lowering Code
1170 //===----------------------------------------------------------------------===//
1171 
1172 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1173   MachineFunction &MF = DAG.getMachineFunction();
1174   DAG.getContext()->diagnose(
1175       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1176 }
1177 
1178 // Test whether the given calling convention is supported.
1179 static bool callingConvSupported(CallingConv::ID CallConv) {
1180   // We currently support the language-independent target-independent
1181   // conventions. We don't yet have a way to annotate calls with properties like
1182   // "cold", and we don't have any call-clobbered registers, so these are mostly
1183   // all handled the same.
1184   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1185          CallConv == CallingConv::Cold ||
1186          CallConv == CallingConv::PreserveMost ||
1187          CallConv == CallingConv::PreserveAll ||
1188          CallConv == CallingConv::CXX_FAST_TLS ||
1189          CallConv == CallingConv::WASM_EmscriptenInvoke ||
1190          CallConv == CallingConv::Swift;
1191 }
1192 
1193 SDValue
1194 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1195                                      SmallVectorImpl<SDValue> &InVals) const {
1196   SelectionDAG &DAG = CLI.DAG;
1197   SDLoc DL = CLI.DL;
1198   SDValue Chain = CLI.Chain;
1199   SDValue Callee = CLI.Callee;
1200   MachineFunction &MF = DAG.getMachineFunction();
1201   auto Layout = MF.getDataLayout();
1202 
1203   CallingConv::ID CallConv = CLI.CallConv;
1204   if (!callingConvSupported(CallConv))
1205     fail(DL, DAG,
1206          "WebAssembly doesn't support language-specific or target-specific "
1207          "calling conventions yet");
1208   if (CLI.IsPatchPoint)
1209     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1210 
1211   if (CLI.IsTailCall) {
1212     auto NoTail = [&](const char *Msg) {
1213       if (CLI.CB && CLI.CB->isMustTailCall())
1214         fail(DL, DAG, Msg);
1215       CLI.IsTailCall = false;
1216     };
1217 
1218     if (!Subtarget->hasTailCall())
1219       NoTail("WebAssembly 'tail-call' feature not enabled");
1220 
1221     // Varargs calls cannot be tail calls because the buffer is on the stack
1222     if (CLI.IsVarArg)
1223       NoTail("WebAssembly does not support varargs tail calls");
1224 
1225     // Do not tail call unless caller and callee return types match
1226     const Function &F = MF.getFunction();
1227     const TargetMachine &TM = getTargetMachine();
1228     Type *RetTy = F.getReturnType();
1229     SmallVector<MVT, 4> CallerRetTys;
1230     SmallVector<MVT, 4> CalleeRetTys;
1231     computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1232     computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1233     bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1234                       std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1235                                  CalleeRetTys.begin());
1236     if (!TypesMatch)
1237       NoTail("WebAssembly tail call requires caller and callee return types to "
1238              "match");
1239 
1240     // If pointers to local stack values are passed, we cannot tail call
1241     if (CLI.CB) {
1242       for (auto &Arg : CLI.CB->args()) {
1243         Value *Val = Arg.get();
1244         // Trace the value back through pointer operations
1245         while (true) {
1246           Value *Src = Val->stripPointerCastsAndAliases();
1247           if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1248             Src = GEP->getPointerOperand();
1249           if (Val == Src)
1250             break;
1251           Val = Src;
1252         }
1253         if (isa<AllocaInst>(Val)) {
1254           NoTail(
1255               "WebAssembly does not support tail calling with stack arguments");
1256           break;
1257         }
1258       }
1259     }
1260   }
1261 
1262   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1263   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1264   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1265 
1266   // The generic code may have added an sret argument. If we're lowering an
1267   // invoke function, the ABI requires that the function pointer be the first
1268   // argument, so we may have to swap the arguments.
1269   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1270       Outs[0].Flags.isSRet()) {
1271     std::swap(Outs[0], Outs[1]);
1272     std::swap(OutVals[0], OutVals[1]);
1273   }
1274 
1275   bool HasSwiftSelfArg = false;
1276   bool HasSwiftErrorArg = false;
1277   unsigned NumFixedArgs = 0;
1278   for (unsigned I = 0; I < Outs.size(); ++I) {
1279     const ISD::OutputArg &Out = Outs[I];
1280     SDValue &OutVal = OutVals[I];
1281     HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1282     HasSwiftErrorArg |= Out.Flags.isSwiftError();
1283     if (Out.Flags.isNest())
1284       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1285     if (Out.Flags.isInAlloca())
1286       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1287     if (Out.Flags.isInConsecutiveRegs())
1288       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1289     if (Out.Flags.isInConsecutiveRegsLast())
1290       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1291     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1292       auto &MFI = MF.getFrameInfo();
1293       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1294                                      Out.Flags.getNonZeroByValAlign(),
1295                                      /*isSS=*/false);
1296       SDValue SizeNode =
1297           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1298       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1299       Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1300                             Out.Flags.getNonZeroByValAlign(),
1301                             /*isVolatile*/ false, /*AlwaysInline=*/false,
1302                             /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1303                             MachinePointerInfo());
1304       OutVal = FINode;
1305     }
1306     // Count the number of fixed args *after* legalization.
1307     NumFixedArgs += Out.IsFixed;
1308   }
1309 
1310   bool IsVarArg = CLI.IsVarArg;
1311   auto PtrVT = getPointerTy(Layout);
1312 
1313   // For swiftcc, emit additional swiftself and swifterror arguments
1314   // if there aren't. These additional arguments are also added for callee
1315   // signature They are necessary to match callee and caller signature for
1316   // indirect call.
1317   if (CallConv == CallingConv::Swift) {
1318     if (!HasSwiftSelfArg) {
1319       NumFixedArgs++;
1320       ISD::OutputArg Arg;
1321       Arg.Flags.setSwiftSelf();
1322       CLI.Outs.push_back(Arg);
1323       SDValue ArgVal = DAG.getUNDEF(PtrVT);
1324       CLI.OutVals.push_back(ArgVal);
1325     }
1326     if (!HasSwiftErrorArg) {
1327       NumFixedArgs++;
1328       ISD::OutputArg Arg;
1329       Arg.Flags.setSwiftError();
1330       CLI.Outs.push_back(Arg);
1331       SDValue ArgVal = DAG.getUNDEF(PtrVT);
1332       CLI.OutVals.push_back(ArgVal);
1333     }
1334   }
1335 
1336   // Analyze operands of the call, assigning locations to each operand.
1337   SmallVector<CCValAssign, 16> ArgLocs;
1338   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1339 
1340   if (IsVarArg) {
1341     // Outgoing non-fixed arguments are placed in a buffer. First
1342     // compute their offsets and the total amount of buffer space needed.
1343     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1344       const ISD::OutputArg &Out = Outs[I];
1345       SDValue &Arg = OutVals[I];
1346       EVT VT = Arg.getValueType();
1347       assert(VT != MVT::iPTR && "Legalized args should be concrete");
1348       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1349       Align Alignment =
1350           std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1351       unsigned Offset =
1352           CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1353       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1354                                         Offset, VT.getSimpleVT(),
1355                                         CCValAssign::Full));
1356     }
1357   }
1358 
1359   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1360 
1361   SDValue FINode;
1362   if (IsVarArg && NumBytes) {
1363     // For non-fixed arguments, next emit stores to store the argument values
1364     // to the stack buffer at the offsets computed above.
1365     MaybeAlign StackAlign = Layout.getStackAlignment();
1366     assert(StackAlign && "data layout string is missing stack alignment");
1367     int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1368                                                  /*isSS=*/false);
1369     unsigned ValNo = 0;
1370     SmallVector<SDValue, 8> Chains;
1371     for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1372       assert(ArgLocs[ValNo].getValNo() == ValNo &&
1373              "ArgLocs should remain in order and only hold varargs args");
1374       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1375       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1376       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1377                                 DAG.getConstant(Offset, DL, PtrVT));
1378       Chains.push_back(
1379           DAG.getStore(Chain, DL, Arg, Add,
1380                        MachinePointerInfo::getFixedStack(MF, FI, Offset)));
1381     }
1382     if (!Chains.empty())
1383       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1384   } else if (IsVarArg) {
1385     FINode = DAG.getIntPtrConstant(0, DL);
1386   }
1387 
1388   if (Callee->getOpcode() == ISD::GlobalAddress) {
1389     // If the callee is a GlobalAddress node (quite common, every direct call
1390     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1391     // doesn't at MO_GOT which is not needed for direct calls.
1392     GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1393     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
1394                                         getPointerTy(DAG.getDataLayout()),
1395                                         GA->getOffset());
1396     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1397                          getPointerTy(DAG.getDataLayout()), Callee);
1398   }
1399 
1400   // Compute the operands for the CALLn node.
1401   SmallVector<SDValue, 16> Ops;
1402   Ops.push_back(Chain);
1403   Ops.push_back(Callee);
1404 
1405   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1406   // isn't reliable.
1407   Ops.append(OutVals.begin(),
1408              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1409   // Add a pointer to the vararg buffer.
1410   if (IsVarArg)
1411     Ops.push_back(FINode);
1412 
1413   SmallVector<EVT, 8> InTys;
1414   for (const auto &In : Ins) {
1415     assert(!In.Flags.isByVal() && "byval is not valid for return values");
1416     assert(!In.Flags.isNest() && "nest is not valid for return values");
1417     if (In.Flags.isInAlloca())
1418       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1419     if (In.Flags.isInConsecutiveRegs())
1420       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1421     if (In.Flags.isInConsecutiveRegsLast())
1422       fail(DL, DAG,
1423            "WebAssembly hasn't implemented cons regs last return values");
1424     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1425     // registers.
1426     InTys.push_back(In.VT);
1427   }
1428 
1429   // Lastly, if this is a call to a funcref we need to add an instruction
1430   // table.set to the chain and transform the call.
1431   if (CLI.CB && WebAssembly::isWebAssemblyFuncrefType(
1432                     CLI.CB->getCalledOperand()->getType())) {
1433     // In the absence of function references proposal where a funcref call is
1434     // lowered to call_ref, using reference types we generate a table.set to set
1435     // the funcref to a special table used solely for this purpose, followed by
1436     // a call_indirect. Here we just generate the table set, and return the
1437     // SDValue of the table.set so that LowerCall can finalize the lowering by
1438     // generating the call_indirect.
1439     SDValue Chain = Ops[0];
1440 
1441     MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
1442         MF.getContext(), Subtarget);
1443     SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1444     SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1445     SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1446     SDValue TableSet = DAG.getMemIntrinsicNode(
1447         WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1448         MVT::funcref,
1449         // Machine Mem Operand args
1450         MachinePointerInfo(
1451             WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF),
1452         CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1453         MachineMemOperand::MOStore);
1454 
1455     Ops[0] = TableSet; // The new chain is the TableSet itself
1456   }
1457 
1458   if (CLI.IsTailCall) {
1459     // ret_calls do not return values to the current frame
1460     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1461     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1462   }
1463 
1464   InTys.push_back(MVT::Other);
1465   SDVTList InTyList = DAG.getVTList(InTys);
1466   SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1467 
1468   for (size_t I = 0; I < Ins.size(); ++I)
1469     InVals.push_back(Res.getValue(I));
1470 
1471   // Return the chain
1472   return Res.getValue(Ins.size());
1473 }
1474 
1475 bool WebAssemblyTargetLowering::CanLowerReturn(
1476     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1477     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1478     const Type *RetTy) const {
1479   // WebAssembly can only handle returning tuples with multivalue enabled
1480   return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1481 }
1482 
1483 SDValue WebAssemblyTargetLowering::LowerReturn(
1484     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1485     const SmallVectorImpl<ISD::OutputArg> &Outs,
1486     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1487     SelectionDAG &DAG) const {
1488   assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1489          "MVP WebAssembly can only return up to one value");
1490   if (!callingConvSupported(CallConv))
1491     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1492 
1493   SmallVector<SDValue, 4> RetOps(1, Chain);
1494   RetOps.append(OutVals.begin(), OutVals.end());
1495   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1496 
1497   // Record the number and types of the return values.
1498   for (const ISD::OutputArg &Out : Outs) {
1499     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1500     assert(!Out.Flags.isNest() && "nest is not valid for return values");
1501     assert(Out.IsFixed && "non-fixed return value is not valid");
1502     if (Out.Flags.isInAlloca())
1503       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1504     if (Out.Flags.isInConsecutiveRegs())
1505       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1506     if (Out.Flags.isInConsecutiveRegsLast())
1507       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1508   }
1509 
1510   return Chain;
1511 }
1512 
1513 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1514     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1515     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1516     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1517   if (!callingConvSupported(CallConv))
1518     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1519 
1520   MachineFunction &MF = DAG.getMachineFunction();
1521   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1522 
1523   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1524   // of the incoming values before they're represented by virtual registers.
1525   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1526 
1527   bool HasSwiftErrorArg = false;
1528   bool HasSwiftSelfArg = false;
1529   for (const ISD::InputArg &In : Ins) {
1530     HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1531     HasSwiftErrorArg |= In.Flags.isSwiftError();
1532     if (In.Flags.isInAlloca())
1533       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1534     if (In.Flags.isNest())
1535       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1536     if (In.Flags.isInConsecutiveRegs())
1537       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1538     if (In.Flags.isInConsecutiveRegsLast())
1539       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1540     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1541     // registers.
1542     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1543                                            DAG.getTargetConstant(InVals.size(),
1544                                                                  DL, MVT::i32))
1545                              : DAG.getUNDEF(In.VT));
1546 
1547     // Record the number and types of arguments.
1548     MFI->addParam(In.VT);
1549   }
1550 
1551   // For swiftcc, emit additional swiftself and swifterror arguments
1552   // if there aren't. These additional arguments are also added for callee
1553   // signature They are necessary to match callee and caller signature for
1554   // indirect call.
1555   auto PtrVT = getPointerTy(MF.getDataLayout());
1556   if (CallConv == CallingConv::Swift) {
1557     if (!HasSwiftSelfArg) {
1558       MFI->addParam(PtrVT);
1559     }
1560     if (!HasSwiftErrorArg) {
1561       MFI->addParam(PtrVT);
1562     }
1563   }
1564   // Varargs are copied into a buffer allocated by the caller, and a pointer to
1565   // the buffer is passed as an argument.
1566   if (IsVarArg) {
1567     MVT PtrVT = getPointerTy(MF.getDataLayout());
1568     Register VarargVreg =
1569         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
1570     MFI->setVarargBufferVreg(VarargVreg);
1571     Chain = DAG.getCopyToReg(
1572         Chain, DL, VarargVreg,
1573         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1574                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1575     MFI->addParam(PtrVT);
1576   }
1577 
1578   // Record the number and types of arguments and results.
1579   SmallVector<MVT, 4> Params;
1580   SmallVector<MVT, 4> Results;
1581   computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
1582                       MF.getFunction(), DAG.getTarget(), Params, Results);
1583   for (MVT VT : Results)
1584     MFI->addResult(VT);
1585   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1586   // the param logic here with ComputeSignatureVTs
1587   assert(MFI->getParams().size() == Params.size() &&
1588          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1589                     Params.begin()));
1590 
1591   return Chain;
1592 }
1593 
1594 void WebAssemblyTargetLowering::ReplaceNodeResults(
1595     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1596   switch (N->getOpcode()) {
1597   case ISD::SIGN_EXTEND_INREG:
1598     // Do not add any results, signifying that N should not be custom lowered
1599     // after all. This happens because simd128 turns on custom lowering for
1600     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1601     // illegal type.
1602     break;
1603   case ISD::SIGN_EXTEND_VECTOR_INREG:
1604   case ISD::ZERO_EXTEND_VECTOR_INREG:
1605     // Do not add any results, signifying that N should not be custom lowered.
1606     // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1607     break;
1608   case ISD::ADD:
1609   case ISD::SUB:
1610     Results.push_back(Replace128Op(N, DAG));
1611     break;
1612   default:
1613     llvm_unreachable(
1614         "ReplaceNodeResults not implemented for this op for WebAssembly!");
1615   }
1616 }
1617 
1618 //===----------------------------------------------------------------------===//
1619 //  Custom lowering hooks.
1620 //===----------------------------------------------------------------------===//
1621 
1622 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1623                                                   SelectionDAG &DAG) const {
1624   SDLoc DL(Op);
1625   switch (Op.getOpcode()) {
1626   default:
1627     llvm_unreachable("unimplemented operation lowering");
1628     return SDValue();
1629   case ISD::FrameIndex:
1630     return LowerFrameIndex(Op, DAG);
1631   case ISD::GlobalAddress:
1632     return LowerGlobalAddress(Op, DAG);
1633   case ISD::GlobalTLSAddress:
1634     return LowerGlobalTLSAddress(Op, DAG);
1635   case ISD::ExternalSymbol:
1636     return LowerExternalSymbol(Op, DAG);
1637   case ISD::JumpTable:
1638     return LowerJumpTable(Op, DAG);
1639   case ISD::BR_JT:
1640     return LowerBR_JT(Op, DAG);
1641   case ISD::VASTART:
1642     return LowerVASTART(Op, DAG);
1643   case ISD::BlockAddress:
1644   case ISD::BRIND:
1645     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1646     return SDValue();
1647   case ISD::RETURNADDR:
1648     return LowerRETURNADDR(Op, DAG);
1649   case ISD::FRAMEADDR:
1650     return LowerFRAMEADDR(Op, DAG);
1651   case ISD::CopyToReg:
1652     return LowerCopyToReg(Op, DAG);
1653   case ISD::EXTRACT_VECTOR_ELT:
1654   case ISD::INSERT_VECTOR_ELT:
1655     return LowerAccessVectorElement(Op, DAG);
1656   case ISD::INTRINSIC_VOID:
1657   case ISD::INTRINSIC_WO_CHAIN:
1658   case ISD::INTRINSIC_W_CHAIN:
1659     return LowerIntrinsic(Op, DAG);
1660   case ISD::SIGN_EXTEND_INREG:
1661     return LowerSIGN_EXTEND_INREG(Op, DAG);
1662   case ISD::ZERO_EXTEND_VECTOR_INREG:
1663   case ISD::SIGN_EXTEND_VECTOR_INREG:
1664     return LowerEXTEND_VECTOR_INREG(Op, DAG);
1665   case ISD::BUILD_VECTOR:
1666     return LowerBUILD_VECTOR(Op, DAG);
1667   case ISD::VECTOR_SHUFFLE:
1668     return LowerVECTOR_SHUFFLE(Op, DAG);
1669   case ISD::SETCC:
1670     return LowerSETCC(Op, DAG);
1671   case ISD::SHL:
1672   case ISD::SRA:
1673   case ISD::SRL:
1674     return LowerShift(Op, DAG);
1675   case ISD::FP_TO_SINT_SAT:
1676   case ISD::FP_TO_UINT_SAT:
1677     return LowerFP_TO_INT_SAT(Op, DAG);
1678   case ISD::LOAD:
1679     return LowerLoad(Op, DAG);
1680   case ISD::STORE:
1681     return LowerStore(Op, DAG);
1682   case ISD::CTPOP:
1683   case ISD::CTLZ:
1684   case ISD::CTTZ:
1685     return DAG.UnrollVectorOp(Op.getNode());
1686   case ISD::CLEAR_CACHE:
1687     report_fatal_error("llvm.clear_cache is not supported on wasm");
1688   case ISD::SMUL_LOHI:
1689   case ISD::UMUL_LOHI:
1690     return LowerMUL_LOHI(Op, DAG);
1691   case ISD::UADDO:
1692     return LowerUADDO(Op, DAG);
1693   }
1694 }
1695 
1696 static bool IsWebAssemblyGlobal(SDValue Op) {
1697   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
1698     return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace());
1699 
1700   return false;
1701 }
1702 
1703 static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1704                                                   SelectionDAG &DAG) {
1705   const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op);
1706   if (!FI)
1707     return std::nullopt;
1708 
1709   auto &MF = DAG.getMachineFunction();
1710   return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex());
1711 }
1712 
1713 SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1714                                               SelectionDAG &DAG) const {
1715   SDLoc DL(Op);
1716   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1717   const SDValue &Value = SN->getValue();
1718   const SDValue &Base = SN->getBasePtr();
1719   const SDValue &Offset = SN->getOffset();
1720 
1721   if (IsWebAssemblyGlobal(Base)) {
1722     if (!Offset->isUndef())
1723       report_fatal_error("unexpected offset when storing to webassembly global",
1724                          false);
1725 
1726     SDVTList Tys = DAG.getVTList(MVT::Other);
1727     SDValue Ops[] = {SN->getChain(), Value, Base};
1728     return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1729                                    SN->getMemoryVT(), SN->getMemOperand());
1730   }
1731 
1732   if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1733     if (!Offset->isUndef())
1734       report_fatal_error("unexpected offset when storing to webassembly local",
1735                          false);
1736 
1737     SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1738     SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1739     SDValue Ops[] = {SN->getChain(), Idx, Value};
1740     return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1741   }
1742 
1743   if (WebAssembly::isWasmVarAddressSpace(SN->getAddressSpace()))
1744     report_fatal_error(
1745         "Encountered an unlowerable store to the wasm_var address space",
1746         false);
1747 
1748   return Op;
1749 }
1750 
1751 SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1752                                              SelectionDAG &DAG) const {
1753   SDLoc DL(Op);
1754   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1755   const SDValue &Base = LN->getBasePtr();
1756   const SDValue &Offset = LN->getOffset();
1757 
1758   if (IsWebAssemblyGlobal(Base)) {
1759     if (!Offset->isUndef())
1760       report_fatal_error(
1761           "unexpected offset when loading from webassembly global", false);
1762 
1763     SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1764     SDValue Ops[] = {LN->getChain(), Base};
1765     return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1766                                    LN->getMemoryVT(), LN->getMemOperand());
1767   }
1768 
1769   if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1770     if (!Offset->isUndef())
1771       report_fatal_error(
1772           "unexpected offset when loading from webassembly local", false);
1773 
1774     SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1775     EVT LocalVT = LN->getValueType(0);
1776     SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT,
1777                                    {LN->getChain(), Idx});
1778     SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL);
1779     assert(Result->getNumValues() == 2 && "Loads must carry a chain!");
1780     return Result;
1781   }
1782 
1783   if (WebAssembly::isWasmVarAddressSpace(LN->getAddressSpace()))
1784     report_fatal_error(
1785         "Encountered an unlowerable load from the wasm_var address space",
1786         false);
1787 
1788   return Op;
1789 }
1790 
1791 SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1792                                                  SelectionDAG &DAG) const {
1793   assert(Subtarget->hasWideArithmetic());
1794   assert(Op.getValueType() == MVT::i64);
1795   SDLoc DL(Op);
1796   unsigned Opcode;
1797   switch (Op.getOpcode()) {
1798   case ISD::UMUL_LOHI:
1799     Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1800     break;
1801   case ISD::SMUL_LOHI:
1802     Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1803     break;
1804   default:
1805     llvm_unreachable("unexpected opcode");
1806   }
1807   SDValue LHS = Op.getOperand(0);
1808   SDValue RHS = Op.getOperand(1);
1809   SDValue Lo =
1810       DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1811   SDValue Hi(Lo.getNode(), 1);
1812   SDValue Ops[] = {Lo, Hi};
1813   return DAG.getMergeValues(Ops, DL);
1814 }
1815 
1816 // Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1817 //
1818 // This enables generating a single wasm instruction for this operation where
1819 // the upper half of both operands are constant zeros. The upper half of the
1820 // result is then whether the overflow happened.
1821 SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1822                                               SelectionDAG &DAG) const {
1823   assert(Subtarget->hasWideArithmetic());
1824   assert(Op.getValueType() == MVT::i64);
1825   assert(Op.getOpcode() == ISD::UADDO);
1826   SDLoc DL(Op);
1827   SDValue LHS = Op.getOperand(0);
1828   SDValue RHS = Op.getOperand(1);
1829   SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1830   SDValue Result =
1831       DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1832                   DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1833   SDValue CarryI64(Result.getNode(), 1);
1834   SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1835   SDValue Ops[] = {Result, CarryI32};
1836   return DAG.getMergeValues(Ops, DL);
1837 }
1838 
1839 SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1840                                                 SelectionDAG &DAG) const {
1841   assert(Subtarget->hasWideArithmetic());
1842   assert(N->getValueType(0) == MVT::i128);
1843   SDLoc DL(N);
1844   unsigned Opcode;
1845   switch (N->getOpcode()) {
1846   case ISD::ADD:
1847     Opcode = WebAssemblyISD::I64_ADD128;
1848     break;
1849   case ISD::SUB:
1850     Opcode = WebAssemblyISD::I64_SUB128;
1851     break;
1852   default:
1853     llvm_unreachable("unexpected opcode");
1854   }
1855   SDValue LHS = N->getOperand(0);
1856   SDValue RHS = N->getOperand(1);
1857 
1858   SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1859   SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1860   SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1861   SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1862   SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1863   SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1864   SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1865                                   LHS_0, LHS_1, RHS_0, RHS_1);
1866   SDValue Result_HI(Result_LO.getNode(), 1);
1867   return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1868 }
1869 
1870 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1871                                                   SelectionDAG &DAG) const {
1872   SDValue Src = Op.getOperand(2);
1873   if (isa<FrameIndexSDNode>(Src.getNode())) {
1874     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1875     // the FI to some LEA-like instruction, but since we don't have that, we
1876     // need to insert some kind of instruction that can take an FI operand and
1877     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1878     // local.copy between Op and its FI operand.
1879     SDValue Chain = Op.getOperand(0);
1880     SDLoc DL(Op);
1881     Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1882     EVT VT = Src.getValueType();
1883     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1884                                                    : WebAssembly::COPY_I64,
1885                                     DL, VT, Src),
1886                  0);
1887     return Op.getNode()->getNumValues() == 1
1888                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1889                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1890                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1891                                                            : SDValue());
1892   }
1893   return SDValue();
1894 }
1895 
1896 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1897                                                    SelectionDAG &DAG) const {
1898   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1899   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1900 }
1901 
1902 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1903                                                    SelectionDAG &DAG) const {
1904   SDLoc DL(Op);
1905 
1906   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1907     fail(DL, DAG,
1908          "Non-Emscripten WebAssembly hasn't implemented "
1909          "__builtin_return_address");
1910     return SDValue();
1911   }
1912 
1913   unsigned Depth = Op.getConstantOperandVal(0);
1914   MakeLibCallOptions CallOptions;
1915   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1916                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1917       .first;
1918 }
1919 
1920 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1921                                                   SelectionDAG &DAG) const {
1922   // Non-zero depths are not supported by WebAssembly currently. Use the
1923   // legalizer's default expansion, which is to return 0 (what this function is
1924   // documented to do).
1925   if (Op.getConstantOperandVal(0) > 0)
1926     return SDValue();
1927 
1928   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1929   EVT VT = Op.getValueType();
1930   Register FP =
1931       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1932   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1933 }
1934 
1935 SDValue
1936 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1937                                                  SelectionDAG &DAG) const {
1938   SDLoc DL(Op);
1939   const auto *GA = cast<GlobalAddressSDNode>(Op);
1940 
1941   MachineFunction &MF = DAG.getMachineFunction();
1942   if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1943     report_fatal_error("cannot use thread-local storage without bulk memory",
1944                        false);
1945 
1946   const GlobalValue *GV = GA->getGlobal();
1947 
1948   // Currently only Emscripten supports dynamic linking with threads. Therefore,
1949   // on other targets, if we have thread-local storage, only the local-exec
1950   // model is possible.
1951   auto model = Subtarget->getTargetTriple().isOSEmscripten()
1952                    ? GV->getThreadLocalMode()
1953                    : GlobalValue::LocalExecTLSModel;
1954 
1955   // Unsupported TLS modes
1956   assert(model != GlobalValue::NotThreadLocal);
1957   assert(model != GlobalValue::InitialExecTLSModel);
1958 
1959   if (model == GlobalValue::LocalExecTLSModel ||
1960       model == GlobalValue::LocalDynamicTLSModel ||
1961       (model == GlobalValue::GeneralDynamicTLSModel &&
1962        getTargetMachine().shouldAssumeDSOLocal(GV))) {
1963     // For DSO-local TLS variables we use offset from __tls_base
1964 
1965     MVT PtrVT = getPointerTy(DAG.getDataLayout());
1966     auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1967                                        : WebAssembly::GLOBAL_GET_I32;
1968     const char *BaseName = MF.createExternalSymbolName("__tls_base");
1969 
1970     SDValue BaseAddr(
1971         DAG.getMachineNode(GlobalGet, DL, PtrVT,
1972                            DAG.getTargetExternalSymbol(BaseName, PtrVT)),
1973         0);
1974 
1975     SDValue TLSOffset = DAG.getTargetGlobalAddress(
1976         GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
1977     SDValue SymOffset =
1978         DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
1979 
1980     return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
1981   }
1982 
1983   assert(model == GlobalValue::GeneralDynamicTLSModel);
1984 
1985   EVT VT = Op.getValueType();
1986   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1987                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1988                                                 GA->getOffset(),
1989                                                 WebAssemblyII::MO_GOT_TLS));
1990 }
1991 
1992 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1993                                                       SelectionDAG &DAG) const {
1994   SDLoc DL(Op);
1995   const auto *GA = cast<GlobalAddressSDNode>(Op);
1996   EVT VT = Op.getValueType();
1997   assert(GA->getTargetFlags() == 0 &&
1998          "Unexpected target flags on generic GlobalAddressSDNode");
1999   if (!WebAssembly::isValidAddressSpace(GA->getAddressSpace()))
2000     fail(DL, DAG, "Invalid address space for WebAssembly target");
2001 
2002   unsigned OperandFlags = 0;
2003   const GlobalValue *GV = GA->getGlobal();
2004   // Since WebAssembly tables cannot yet be shared accross modules, we don't
2005   // need special treatment for tables in PIC mode.
2006   if (isPositionIndependent() &&
2007       !WebAssembly::isWebAssemblyTableType(GV->getValueType())) {
2008     if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2009       MachineFunction &MF = DAG.getMachineFunction();
2010       MVT PtrVT = getPointerTy(MF.getDataLayout());
2011       const char *BaseName;
2012       if (GV->getValueType()->isFunctionTy()) {
2013         BaseName = MF.createExternalSymbolName("__table_base");
2014         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
2015       } else {
2016         BaseName = MF.createExternalSymbolName("__memory_base");
2017         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
2018       }
2019       SDValue BaseAddr =
2020           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2021                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
2022 
2023       SDValue SymAddr = DAG.getNode(
2024           WebAssemblyISD::WrapperREL, DL, VT,
2025           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2026                                      OperandFlags));
2027 
2028       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2029     }
2030     OperandFlags = WebAssemblyII::MO_GOT;
2031   }
2032 
2033   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2034                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2035                                                 GA->getOffset(), OperandFlags));
2036 }
2037 
2038 SDValue
2039 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2040                                                SelectionDAG &DAG) const {
2041   SDLoc DL(Op);
2042   const auto *ES = cast<ExternalSymbolSDNode>(Op);
2043   EVT VT = Op.getValueType();
2044   assert(ES->getTargetFlags() == 0 &&
2045          "Unexpected target flags on generic ExternalSymbolSDNode");
2046   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2047                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2048 }
2049 
2050 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2051                                                   SelectionDAG &DAG) const {
2052   // There's no need for a Wrapper node because we always incorporate a jump
2053   // table operand into a BR_TABLE instruction, rather than ever
2054   // materializing it in a register.
2055   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2056   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2057                                 JT->getTargetFlags());
2058 }
2059 
2060 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2061                                               SelectionDAG &DAG) const {
2062   SDLoc DL(Op);
2063   SDValue Chain = Op.getOperand(0);
2064   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2065   SDValue Index = Op.getOperand(2);
2066   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2067 
2068   SmallVector<SDValue, 8> Ops;
2069   Ops.push_back(Chain);
2070   Ops.push_back(Index);
2071 
2072   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2073   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2074 
2075   // Add an operand for each case.
2076   for (auto *MBB : MBBs)
2077     Ops.push_back(DAG.getBasicBlock(MBB));
2078 
2079   // Add the first MBB as a dummy default target for now. This will be replaced
2080   // with the proper default target (and the preceding range check eliminated)
2081   // if possible by WebAssemblyFixBrTableDefaults.
2082   Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2083   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2084 }
2085 
2086 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2087                                                 SelectionDAG &DAG) const {
2088   SDLoc DL(Op);
2089   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2090 
2091   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2092   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2093 
2094   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2095                                     MFI->getVarargBufferVreg(), PtrVT);
2096   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2097                       MachinePointerInfo(SV));
2098 }
2099 
2100 // Try to lower partial.reduce.add to a dot or fallback to a sequence with
2101 // extmul and adds.
2102 SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) {
2103   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
2104   if (N->getConstantOperandVal(0) !=
2105       Intrinsic::experimental_vector_partial_reduce_add)
2106     return SDValue();
2107 
2108   assert(N->getValueType(0) == MVT::v4i32 && "can only support v4i32");
2109   SDLoc DL(N);
2110   SDValue Mul = N->getOperand(2);
2111   assert(Mul->getOpcode() == ISD::MUL && "expected mul input");
2112 
2113   SDValue ExtendLHS = Mul->getOperand(0);
2114   SDValue ExtendRHS = Mul->getOperand(1);
2115   assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) &&
2116           ISD::isExtOpcode(ExtendRHS.getOpcode())) &&
2117          "expected widening mul");
2118   assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() &&
2119          "expected mul to use the same extend for both operands");
2120 
2121   SDValue ExtendInLHS = ExtendLHS->getOperand(0);
2122   SDValue ExtendInRHS = ExtendRHS->getOperand(0);
2123   bool IsSigned = ExtendLHS->getOpcode() == ISD::SIGN_EXTEND;
2124 
2125   if (ExtendInLHS->getValueType(0) == MVT::v8i16) {
2126     if (IsSigned) {
2127       // i32x4.dot_i16x8_s
2128       SDValue Dot = DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32,
2129                                 ExtendInLHS, ExtendInRHS);
2130       return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Dot);
2131     }
2132 
2133     unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U;
2134     unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U;
2135 
2136     // (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs)))
2137     SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInLHS);
2138     SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInRHS);
2139     SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInLHS);
2140     SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInRHS);
2141 
2142     SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v4i32, LowLHS, LowRHS);
2143     SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v4i32, HighLHS, HighRHS);
2144     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, MulLow, MulHigh);
2145     return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2146   } else {
2147     assert(ExtendInLHS->getValueType(0) == MVT::v16i8 &&
2148            "expected v16i8 input types");
2149     // Lower to a wider tree, using twice the operations compared to above.
2150     if (IsSigned) {
2151       // Use two dots
2152       unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_S;
2153       unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_S;
2154       SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS);
2155       SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS);
2156       SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS);
2157       SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS);
2158       SDValue DotLHS =
2159           DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, LowLHS, LowRHS);
2160       SDValue DotRHS =
2161           DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, HighLHS, HighRHS);
2162       SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, DotLHS, DotRHS);
2163       return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2164     }
2165 
2166     unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U;
2167     unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U;
2168     SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS);
2169     SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS);
2170     SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS);
2171     SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS);
2172 
2173     SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
2174     SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
2175 
2176     SDValue LowLow = DAG.getNode(LowOpc, DL, MVT::v4i32, MulLow);
2177     SDValue LowHigh = DAG.getNode(LowOpc, DL, MVT::v4i32, MulHigh);
2178     SDValue HighLow = DAG.getNode(HighOpc, DL, MVT::v4i32, MulLow);
2179     SDValue HighHigh = DAG.getNode(HighOpc, DL, MVT::v4i32, MulHigh);
2180 
2181     SDValue AddLow = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowLow, HighLow);
2182     SDValue AddHigh = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowHigh, HighHigh);
2183     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh);
2184     return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2185   }
2186 }
2187 
2188 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2189                                                   SelectionDAG &DAG) const {
2190   MachineFunction &MF = DAG.getMachineFunction();
2191   unsigned IntNo;
2192   switch (Op.getOpcode()) {
2193   case ISD::INTRINSIC_VOID:
2194   case ISD::INTRINSIC_W_CHAIN:
2195     IntNo = Op.getConstantOperandVal(1);
2196     break;
2197   case ISD::INTRINSIC_WO_CHAIN:
2198     IntNo = Op.getConstantOperandVal(0);
2199     break;
2200   default:
2201     llvm_unreachable("Invalid intrinsic");
2202   }
2203   SDLoc DL(Op);
2204 
2205   switch (IntNo) {
2206   default:
2207     return SDValue(); // Don't custom lower most intrinsics.
2208 
2209   case Intrinsic::wasm_lsda: {
2210     auto PtrVT = getPointerTy(MF.getDataLayout());
2211     const char *SymName = MF.createExternalSymbolName(
2212         "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2213     if (isPositionIndependent()) {
2214       SDValue Node = DAG.getTargetExternalSymbol(
2215           SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2216       const char *BaseName = MF.createExternalSymbolName("__memory_base");
2217       SDValue BaseAddr =
2218           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2219                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
2220       SDValue SymAddr =
2221           DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2222       return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2223     }
2224     SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2225     return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2226   }
2227 
2228   case Intrinsic::wasm_shuffle: {
2229     // Drop in-chain and replace undefs, but otherwise pass through unchanged
2230     SDValue Ops[18];
2231     size_t OpIdx = 0;
2232     Ops[OpIdx++] = Op.getOperand(1);
2233     Ops[OpIdx++] = Op.getOperand(2);
2234     while (OpIdx < 18) {
2235       const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2236       if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2237         bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2238         Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2239       } else {
2240         Ops[OpIdx++] = MaskIdx;
2241       }
2242     }
2243     return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2244   }
2245 
2246   case Intrinsic::thread_pointer: {
2247     MVT PtrVT = getPointerTy(DAG.getDataLayout());
2248     auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2249                                        : WebAssembly::GLOBAL_GET_I32;
2250     const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2251     return SDValue(
2252         DAG.getMachineNode(GlobalGet, DL, PtrVT,
2253                            DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2254         0);
2255   }
2256   }
2257 }
2258 
2259 SDValue
2260 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2261                                                   SelectionDAG &DAG) const {
2262   SDLoc DL(Op);
2263   // If sign extension operations are disabled, allow sext_inreg only if operand
2264   // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2265   // extension operations, but allowing sext_inreg in this context lets us have
2266   // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2267   // everywhere would be simpler in this file, but would necessitate large and
2268   // brittle patterns to undo the expansion and select extract_lane_s
2269   // instructions.
2270   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2271   if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2272     return SDValue();
2273 
2274   const SDValue &Extract = Op.getOperand(0);
2275   MVT VecT = Extract.getOperand(0).getSimpleValueType();
2276   if (VecT.getVectorElementType().getSizeInBits() > 32)
2277     return SDValue();
2278   MVT ExtractedLaneT =
2279       cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2280   MVT ExtractedVecT =
2281       MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2282   if (ExtractedVecT == VecT)
2283     return Op;
2284 
2285   // Bitcast vector to appropriate type to ensure ISel pattern coverage
2286   const SDNode *Index = Extract.getOperand(1).getNode();
2287   if (!isa<ConstantSDNode>(Index))
2288     return SDValue();
2289   unsigned IndexVal = Index->getAsZExtVal();
2290   unsigned Scale =
2291       ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2292   assert(Scale > 1);
2293   SDValue NewIndex =
2294       DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2295   SDValue NewExtract = DAG.getNode(
2296       ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
2297       DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2298   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2299                      Op.getOperand(1));
2300 }
2301 
2302 static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2303                              SelectionDAG &DAG) {
2304   if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2305     return SDValue();
2306 
2307   assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2308           UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2309          "expected extend_low");
2310   auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2311 
2312   ArrayRef<int> Mask = Shuffle->getMask();
2313   // Look for a shuffle which moves from the high half to the low half.
2314   size_t FirstIdx = Mask.size() / 2;
2315   for (size_t i = 0; i < Mask.size() / 2; ++i) {
2316     if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2317       return SDValue();
2318     }
2319   }
2320 
2321   SDLoc DL(Op);
2322   unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2323                      ? WebAssemblyISD::EXTEND_HIGH_S
2324                      : WebAssemblyISD::EXTEND_HIGH_U;
2325   return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2326 }
2327 
2328 SDValue
2329 WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2330                                                     SelectionDAG &DAG) const {
2331   SDLoc DL(Op);
2332   EVT VT = Op.getValueType();
2333   SDValue Src = Op.getOperand(0);
2334   EVT SrcVT = Src.getValueType();
2335 
2336   if (SrcVT.getVectorElementType() == MVT::i1 ||
2337       SrcVT.getVectorElementType() == MVT::i64)
2338     return SDValue();
2339 
2340   assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2341          "Unexpected extension factor.");
2342   unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2343 
2344   if (Scale != 2 && Scale != 4 && Scale != 8)
2345     return SDValue();
2346 
2347   unsigned Ext;
2348   switch (Op.getOpcode()) {
2349   case ISD::ZERO_EXTEND_VECTOR_INREG:
2350     Ext = WebAssemblyISD::EXTEND_LOW_U;
2351     break;
2352   case ISD::SIGN_EXTEND_VECTOR_INREG:
2353     Ext = WebAssemblyISD::EXTEND_LOW_S;
2354     break;
2355   }
2356 
2357   if (Scale == 2) {
2358     // See if we can use EXTEND_HIGH.
2359     if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2360       return ExtendHigh;
2361   }
2362 
2363   SDValue Ret = Src;
2364   while (Scale != 1) {
2365     Ret = DAG.getNode(Ext, DL,
2366                       Ret.getValueType()
2367                           .widenIntegerVectorElementType(*DAG.getContext())
2368                           .getHalfNumVectorElementsVT(*DAG.getContext()),
2369                       Ret);
2370     Scale /= 2;
2371   }
2372   assert(Ret.getValueType() == VT);
2373   return Ret;
2374 }
2375 
2376 static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
2377   SDLoc DL(Op);
2378   if (Op.getValueType() != MVT::v2f64)
2379     return SDValue();
2380 
2381   auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2382                              unsigned &Index) -> bool {
2383     switch (Op.getOpcode()) {
2384     case ISD::SINT_TO_FP:
2385       Opcode = WebAssemblyISD::CONVERT_LOW_S;
2386       break;
2387     case ISD::UINT_TO_FP:
2388       Opcode = WebAssemblyISD::CONVERT_LOW_U;
2389       break;
2390     case ISD::FP_EXTEND:
2391       Opcode = WebAssemblyISD::PROMOTE_LOW;
2392       break;
2393     default:
2394       return false;
2395     }
2396 
2397     auto ExtractVector = Op.getOperand(0);
2398     if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2399       return false;
2400 
2401     if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2402       return false;
2403 
2404     SrcVec = ExtractVector.getOperand(0);
2405     Index = ExtractVector.getConstantOperandVal(1);
2406     return true;
2407   };
2408 
2409   unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2410   SDValue LHSSrcVec, RHSSrcVec;
2411   if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2412       !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2413     return SDValue();
2414 
2415   if (LHSOpcode != RHSOpcode)
2416     return SDValue();
2417 
2418   MVT ExpectedSrcVT;
2419   switch (LHSOpcode) {
2420   case WebAssemblyISD::CONVERT_LOW_S:
2421   case WebAssemblyISD::CONVERT_LOW_U:
2422     ExpectedSrcVT = MVT::v4i32;
2423     break;
2424   case WebAssemblyISD::PROMOTE_LOW:
2425     ExpectedSrcVT = MVT::v4f32;
2426     break;
2427   }
2428   if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2429     return SDValue();
2430 
2431   auto Src = LHSSrcVec;
2432   if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2433     // Shuffle the source vector so that the converted lanes are the low lanes.
2434     Src = DAG.getVectorShuffle(
2435         ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2436         {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2437   }
2438   return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2439 }
2440 
2441 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2442                                                      SelectionDAG &DAG) const {
2443   MVT VT = Op.getSimpleValueType();
2444   if (VT == MVT::v8f16) {
2445     // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2446     // FP16 type, so cast them to I16s.
2447     MVT IVT = VT.changeVectorElementType(MVT::i16);
2448     SmallVector<SDValue, 8> NewOps;
2449     for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2450       NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2451     SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2452     return DAG.getBitcast(VT, Res);
2453   }
2454 
2455   if (auto ConvertLow = LowerConvertLow(Op, DAG))
2456     return ConvertLow;
2457 
2458   SDLoc DL(Op);
2459   const EVT VecT = Op.getValueType();
2460   const EVT LaneT = Op.getOperand(0).getValueType();
2461   const size_t Lanes = Op.getNumOperands();
2462   bool CanSwizzle = VecT == MVT::v16i8;
2463 
2464   // BUILD_VECTORs are lowered to the instruction that initializes the highest
2465   // possible number of lanes at once followed by a sequence of replace_lane
2466   // instructions to individually initialize any remaining lanes.
2467 
2468   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2469   // swizzled lanes should be given greater weight.
2470 
2471   // TODO: Investigate looping rather than always extracting/replacing specific
2472   // lanes to fill gaps.
2473 
2474   auto IsConstant = [](const SDValue &V) {
2475     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2476   };
2477 
2478   // Returns the source vector and index vector pair if they exist. Checks for:
2479   //   (extract_vector_elt
2480   //     $src,
2481   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
2482   //   )
2483   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2484     auto Bail = std::make_pair(SDValue(), SDValue());
2485     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2486       return Bail;
2487     const SDValue &SwizzleSrc = Lane->getOperand(0);
2488     const SDValue &IndexExt = Lane->getOperand(1);
2489     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2490       return Bail;
2491     const SDValue &Index = IndexExt->getOperand(0);
2492     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2493       return Bail;
2494     const SDValue &SwizzleIndices = Index->getOperand(0);
2495     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2496         SwizzleIndices.getValueType() != MVT::v16i8 ||
2497         Index->getOperand(1)->getOpcode() != ISD::Constant ||
2498         Index->getConstantOperandVal(1) != I)
2499       return Bail;
2500     return std::make_pair(SwizzleSrc, SwizzleIndices);
2501   };
2502 
2503   // If the lane is extracted from another vector at a constant index, return
2504   // that vector. The source vector must not have more lanes than the dest
2505   // because the shufflevector indices are in terms of the destination lanes and
2506   // would not be able to address the smaller individual source lanes.
2507   auto GetShuffleSrc = [&](const SDValue &Lane) {
2508     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2509       return SDValue();
2510     if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2511       return SDValue();
2512     if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2513         VecT.getVectorNumElements())
2514       return SDValue();
2515     return Lane->getOperand(0);
2516   };
2517 
2518   using ValueEntry = std::pair<SDValue, size_t>;
2519   SmallVector<ValueEntry, 16> SplatValueCounts;
2520 
2521   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2522   SmallVector<SwizzleEntry, 16> SwizzleCounts;
2523 
2524   using ShuffleEntry = std::pair<SDValue, size_t>;
2525   SmallVector<ShuffleEntry, 16> ShuffleCounts;
2526 
2527   auto AddCount = [](auto &Counts, const auto &Val) {
2528     auto CountIt =
2529         llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2530     if (CountIt == Counts.end()) {
2531       Counts.emplace_back(Val, 1);
2532     } else {
2533       CountIt->second++;
2534     }
2535   };
2536 
2537   auto GetMostCommon = [](auto &Counts) {
2538     auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2539     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2540     return *CommonIt;
2541   };
2542 
2543   size_t NumConstantLanes = 0;
2544 
2545   // Count eligible lanes for each type of vector creation op
2546   for (size_t I = 0; I < Lanes; ++I) {
2547     const SDValue &Lane = Op->getOperand(I);
2548     if (Lane.isUndef())
2549       continue;
2550 
2551     AddCount(SplatValueCounts, Lane);
2552 
2553     if (IsConstant(Lane))
2554       NumConstantLanes++;
2555     if (auto ShuffleSrc = GetShuffleSrc(Lane))
2556       AddCount(ShuffleCounts, ShuffleSrc);
2557     if (CanSwizzle) {
2558       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2559       if (SwizzleSrcs.first)
2560         AddCount(SwizzleCounts, SwizzleSrcs);
2561     }
2562   }
2563 
2564   SDValue SplatValue;
2565   size_t NumSplatLanes;
2566   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2567 
2568   SDValue SwizzleSrc;
2569   SDValue SwizzleIndices;
2570   size_t NumSwizzleLanes = 0;
2571   if (SwizzleCounts.size())
2572     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2573                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2574 
2575   // Shuffles can draw from up to two vectors, so find the two most common
2576   // sources.
2577   SDValue ShuffleSrc1, ShuffleSrc2;
2578   size_t NumShuffleLanes = 0;
2579   if (ShuffleCounts.size()) {
2580     std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2581     llvm::erase_if(ShuffleCounts,
2582                    [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2583   }
2584   if (ShuffleCounts.size()) {
2585     size_t AdditionalShuffleLanes;
2586     std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2587         GetMostCommon(ShuffleCounts);
2588     NumShuffleLanes += AdditionalShuffleLanes;
2589   }
2590 
2591   // Predicate returning true if the lane is properly initialized by the
2592   // original instruction
2593   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2594   SDValue Result;
2595   // Prefer swizzles over shuffles over vector consts over splats
2596   if (NumSwizzleLanes >= NumShuffleLanes &&
2597       NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2598     Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2599                          SwizzleIndices);
2600     auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2601     IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2602       return Swizzled == GetSwizzleSrcs(I, Lane);
2603     };
2604   } else if (NumShuffleLanes >= NumConstantLanes &&
2605              NumShuffleLanes >= NumSplatLanes) {
2606     size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2607     size_t DestLaneCount = VecT.getVectorNumElements();
2608     size_t Scale1 = 1;
2609     size_t Scale2 = 1;
2610     SDValue Src1 = ShuffleSrc1;
2611     SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2612     if (Src1.getValueType() != VecT) {
2613       size_t LaneSize =
2614           Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
2615       assert(LaneSize > DestLaneSize);
2616       Scale1 = LaneSize / DestLaneSize;
2617       Src1 = DAG.getBitcast(VecT, Src1);
2618     }
2619     if (Src2.getValueType() != VecT) {
2620       size_t LaneSize =
2621           Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
2622       assert(LaneSize > DestLaneSize);
2623       Scale2 = LaneSize / DestLaneSize;
2624       Src2 = DAG.getBitcast(VecT, Src2);
2625     }
2626 
2627     int Mask[16];
2628     assert(DestLaneCount <= 16);
2629     for (size_t I = 0; I < DestLaneCount; ++I) {
2630       const SDValue &Lane = Op->getOperand(I);
2631       SDValue Src = GetShuffleSrc(Lane);
2632       if (Src == ShuffleSrc1) {
2633         Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2634       } else if (Src && Src == ShuffleSrc2) {
2635         Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2636       } else {
2637         Mask[I] = -1;
2638       }
2639     }
2640     ArrayRef<int> MaskRef(Mask, DestLaneCount);
2641     Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2642     IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2643       auto Src = GetShuffleSrc(Lane);
2644       return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2645     };
2646   } else if (NumConstantLanes >= NumSplatLanes) {
2647     SmallVector<SDValue, 16> ConstLanes;
2648     for (const SDValue &Lane : Op->op_values()) {
2649       if (IsConstant(Lane)) {
2650         // Values may need to be fixed so that they will sign extend to be
2651         // within the expected range during ISel. Check whether the value is in
2652         // bounds based on the lane bit width and if it is out of bounds, lop
2653         // off the extra bits and subtract 2^n to reflect giving the high bit
2654         // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
2655         // cannot possibly be out of range.
2656         auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());
2657         int64_t Val = Const ? Const->getSExtValue() : 0;
2658         uint64_t LaneBits = 128 / Lanes;
2659         assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
2660                "Unexpected out of bounds negative value");
2661         if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
2662           uint64_t Mask = (1ll << LaneBits) - 1;
2663           auto NewVal = (((uint64_t)Val & Mask) - (1ll << LaneBits)) & Mask;
2664           ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
2665         } else {
2666           ConstLanes.push_back(Lane);
2667         }
2668       } else if (LaneT.isFloatingPoint()) {
2669         ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2670       } else {
2671         ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2672       }
2673     }
2674     Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2675     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2676       return IsConstant(Lane);
2677     };
2678   } else {
2679     size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2680     if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2681         (DestLaneSize == 32 || DestLaneSize == 64)) {
2682       // Could be selected to load_zero.
2683       Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2684     } else {
2685       // Use a splat (which might be selected as a load splat)
2686       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2687     }
2688     IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2689       return Lane == SplatValue;
2690     };
2691   }
2692 
2693   assert(Result);
2694   assert(IsLaneConstructed);
2695 
2696   // Add replace_lane instructions for any unhandled values
2697   for (size_t I = 0; I < Lanes; ++I) {
2698     const SDValue &Lane = Op->getOperand(I);
2699     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2700       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2701                            DAG.getConstant(I, DL, MVT::i32));
2702   }
2703 
2704   return Result;
2705 }
2706 
2707 SDValue
2708 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2709                                                SelectionDAG &DAG) const {
2710   SDLoc DL(Op);
2711   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2712   MVT VecType = Op.getOperand(0).getSimpleValueType();
2713   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2714   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2715 
2716   // Space for two vector args and sixteen mask indices
2717   SDValue Ops[18];
2718   size_t OpIdx = 0;
2719   Ops[OpIdx++] = Op.getOperand(0);
2720   Ops[OpIdx++] = Op.getOperand(1);
2721 
2722   // Expand mask indices to byte indices and materialize them as operands
2723   for (int M : Mask) {
2724     for (size_t J = 0; J < LaneBytes; ++J) {
2725       // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2726       // whole lane of vector input, to allow further reduction at VM. E.g.
2727       // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2728       uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2729       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2730     }
2731   }
2732 
2733   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2734 }
2735 
2736 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2737                                               SelectionDAG &DAG) const {
2738   SDLoc DL(Op);
2739   // The legalizer does not know how to expand the unsupported comparison modes
2740   // of i64x2 vectors, so we manually unroll them here.
2741   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2742   SmallVector<SDValue, 2> LHS, RHS;
2743   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2744   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2745   const SDValue &CC = Op->getOperand(2);
2746   auto MakeLane = [&](unsigned I) {
2747     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2748                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2749                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2750   };
2751   return DAG.getBuildVector(Op->getValueType(0), DL,
2752                             {MakeLane(0), MakeLane(1)});
2753 }
2754 
2755 SDValue
2756 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2757                                                     SelectionDAG &DAG) const {
2758   // Allow constant lane indices, expand variable lane indices
2759   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2760   if (isa<ConstantSDNode>(IdxNode)) {
2761     // Ensure the index type is i32 to match the tablegen patterns
2762     uint64_t Idx = IdxNode->getAsZExtVal();
2763     SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2764     Ops[Op.getNumOperands() - 1] =
2765         DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2766     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2767   }
2768   // Perform default expansion
2769   return SDValue();
2770 }
2771 
2772 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
2773   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2774   // 32-bit and 64-bit unrolled shifts will have proper semantics
2775   if (LaneT.bitsGE(MVT::i32))
2776     return DAG.UnrollVectorOp(Op.getNode());
2777   // Otherwise mask the shift value to get proper semantics from 32-bit shift
2778   SDLoc DL(Op);
2779   size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2780   SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2781   unsigned ShiftOpcode = Op.getOpcode();
2782   SmallVector<SDValue, 16> ShiftedElements;
2783   DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2784   SmallVector<SDValue, 16> ShiftElements;
2785   DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2786   SmallVector<SDValue, 16> UnrolledOps;
2787   for (size_t i = 0; i < NumLanes; ++i) {
2788     SDValue MaskedShiftValue =
2789         DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2790     SDValue ShiftedValue = ShiftedElements[i];
2791     if (ShiftOpcode == ISD::SRA)
2792       ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2793                                  ShiftedValue, DAG.getValueType(LaneT));
2794     UnrolledOps.push_back(
2795         DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2796   }
2797   return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2798 }
2799 
2800 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2801                                               SelectionDAG &DAG) const {
2802   SDLoc DL(Op);
2803 
2804   // Only manually lower vector shifts
2805   assert(Op.getSimpleValueType().isVector());
2806 
2807   uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2808   auto ShiftVal = Op.getOperand(1);
2809 
2810   // Try to skip bitmask operation since it is implied inside shift instruction
2811   auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2812     if (MaskOp.getOpcode() != ISD::AND)
2813       return MaskOp;
2814     SDValue LHS = MaskOp.getOperand(0);
2815     SDValue RHS = MaskOp.getOperand(1);
2816     if (MaskOp.getValueType().isVector()) {
2817       APInt MaskVal;
2818       if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2819         std::swap(LHS, RHS);
2820 
2821       if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2822           MaskVal == MaskBits)
2823         MaskOp = LHS;
2824     } else {
2825       if (!isa<ConstantSDNode>(RHS.getNode()))
2826         std::swap(LHS, RHS);
2827 
2828       auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2829       if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2830         MaskOp = LHS;
2831     }
2832 
2833     return MaskOp;
2834   };
2835 
2836   // Skip vector and operation
2837   ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2838   ShiftVal = DAG.getSplatValue(ShiftVal);
2839   if (!ShiftVal)
2840     return unrollVectorShift(Op, DAG);
2841 
2842   // Skip scalar and operation
2843   ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2844   // Use anyext because none of the high bits can affect the shift
2845   ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2846 
2847   unsigned Opcode;
2848   switch (Op.getOpcode()) {
2849   case ISD::SHL:
2850     Opcode = WebAssemblyISD::VEC_SHL;
2851     break;
2852   case ISD::SRA:
2853     Opcode = WebAssemblyISD::VEC_SHR_S;
2854     break;
2855   case ISD::SRL:
2856     Opcode = WebAssemblyISD::VEC_SHR_U;
2857     break;
2858   default:
2859     llvm_unreachable("unexpected opcode");
2860   }
2861 
2862   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2863 }
2864 
2865 SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2866                                                       SelectionDAG &DAG) const {
2867   EVT ResT = Op.getValueType();
2868   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2869 
2870   if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2871       (SatVT == MVT::i32 || SatVT == MVT::i64))
2872     return Op;
2873 
2874   if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2875     return Op;
2876 
2877   if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2878     return Op;
2879 
2880   return SDValue();
2881 }
2882 
2883 //===----------------------------------------------------------------------===//
2884 //   Custom DAG combine hooks
2885 //===----------------------------------------------------------------------===//
2886 static SDValue
2887 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2888   auto &DAG = DCI.DAG;
2889   auto Shuffle = cast<ShuffleVectorSDNode>(N);
2890 
2891   // Hoist vector bitcasts that don't change the number of lanes out of unary
2892   // shuffles, where they are less likely to get in the way of other combines.
2893   // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2894   //  (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2895   SDValue Bitcast = N->getOperand(0);
2896   if (Bitcast.getOpcode() != ISD::BITCAST)
2897     return SDValue();
2898   if (!N->getOperand(1).isUndef())
2899     return SDValue();
2900   SDValue CastOp = Bitcast.getOperand(0);
2901   EVT SrcType = CastOp.getValueType();
2902   EVT DstType = Bitcast.getValueType();
2903   if (!SrcType.is128BitVector() ||
2904       SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2905     return SDValue();
2906   SDValue NewShuffle = DAG.getVectorShuffle(
2907       SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2908   return DAG.getBitcast(DstType, NewShuffle);
2909 }
2910 
2911 /// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2912 /// split up into scalar instructions during legalization, and the vector
2913 /// extending instructions are selected in performVectorExtendCombine below.
2914 static SDValue
2915 performVectorExtendToFPCombine(SDNode *N,
2916                                TargetLowering::DAGCombinerInfo &DCI) {
2917   auto &DAG = DCI.DAG;
2918   assert(N->getOpcode() == ISD::UINT_TO_FP ||
2919          N->getOpcode() == ISD::SINT_TO_FP);
2920 
2921   EVT InVT = N->getOperand(0)->getValueType(0);
2922   EVT ResVT = N->getValueType(0);
2923   MVT ExtVT;
2924   if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2925     ExtVT = MVT::v4i32;
2926   else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2927     ExtVT = MVT::v2i32;
2928   else
2929     return SDValue();
2930 
2931   unsigned Op =
2932       N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
2933   SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2934   return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2935 }
2936 
2937 static SDValue
2938 performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2939   auto &DAG = DCI.DAG;
2940   assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2941          N->getOpcode() == ISD::ZERO_EXTEND);
2942 
2943   // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2944   // possible before the extract_subvector can be expanded.
2945   auto Extract = N->getOperand(0);
2946   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2947     return SDValue();
2948   auto Source = Extract.getOperand(0);
2949   auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2950   if (IndexNode == nullptr)
2951     return SDValue();
2952   auto Index = IndexNode->getZExtValue();
2953 
2954   // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2955   // extracted subvector is the low or high half of its source.
2956   EVT ResVT = N->getValueType(0);
2957   if (ResVT == MVT::v8i16) {
2958     if (Extract.getValueType() != MVT::v8i8 ||
2959         Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2960       return SDValue();
2961   } else if (ResVT == MVT::v4i32) {
2962     if (Extract.getValueType() != MVT::v4i16 ||
2963         Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2964       return SDValue();
2965   } else if (ResVT == MVT::v2i64) {
2966     if (Extract.getValueType() != MVT::v2i32 ||
2967         Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2968       return SDValue();
2969   } else {
2970     return SDValue();
2971   }
2972 
2973   bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2974   bool IsLow = Index == 0;
2975 
2976   unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2977                                 : WebAssemblyISD::EXTEND_HIGH_S)
2978                        : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2979                                 : WebAssemblyISD::EXTEND_HIGH_U);
2980 
2981   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2982 }
2983 
2984 static SDValue
2985 performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2986   auto &DAG = DCI.DAG;
2987 
2988   auto GetWasmConversionOp = [](unsigned Op) {
2989     switch (Op) {
2990     case ISD::FP_TO_SINT_SAT:
2991       return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2992     case ISD::FP_TO_UINT_SAT:
2993       return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2994     case ISD::FP_ROUND:
2995       return WebAssemblyISD::DEMOTE_ZERO;
2996     }
2997     llvm_unreachable("unexpected op");
2998   };
2999 
3000   auto IsZeroSplat = [](SDValue SplatVal) {
3001     auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3002     APInt SplatValue, SplatUndef;
3003     unsigned SplatBitSize;
3004     bool HasAnyUndefs;
3005     // Endianness doesn't matter in this context because we are looking for
3006     // an all-zero value.
3007     return Splat &&
3008            Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3009                                   HasAnyUndefs) &&
3010            SplatValue == 0;
3011   };
3012 
3013   if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3014     // Combine this:
3015     //
3016     //   (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3017     //
3018     // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3019     //
3020     // Or this:
3021     //
3022     //   (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3023     //
3024     // into (f32x4.demote_zero_f64x2 $x).
3025     EVT ResVT;
3026     EVT ExpectedConversionType;
3027     auto Conversion = N->getOperand(0);
3028     auto ConversionOp = Conversion.getOpcode();
3029     switch (ConversionOp) {
3030     case ISD::FP_TO_SINT_SAT:
3031     case ISD::FP_TO_UINT_SAT:
3032       ResVT = MVT::v4i32;
3033       ExpectedConversionType = MVT::v2i32;
3034       break;
3035     case ISD::FP_ROUND:
3036       ResVT = MVT::v4f32;
3037       ExpectedConversionType = MVT::v2f32;
3038       break;
3039     default:
3040       return SDValue();
3041     }
3042 
3043     if (N->getValueType(0) != ResVT)
3044       return SDValue();
3045 
3046     if (Conversion.getValueType() != ExpectedConversionType)
3047       return SDValue();
3048 
3049     auto Source = Conversion.getOperand(0);
3050     if (Source.getValueType() != MVT::v2f64)
3051       return SDValue();
3052 
3053     if (!IsZeroSplat(N->getOperand(1)) ||
3054         N->getOperand(1).getValueType() != ExpectedConversionType)
3055       return SDValue();
3056 
3057     unsigned Op = GetWasmConversionOp(ConversionOp);
3058     return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3059   }
3060 
3061   // Combine this:
3062   //
3063   //   (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3064   //
3065   // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3066   //
3067   // Or this:
3068   //
3069   //   (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3070   //
3071   // into (f32x4.demote_zero_f64x2 $x).
3072   EVT ResVT;
3073   auto ConversionOp = N->getOpcode();
3074   switch (ConversionOp) {
3075   case ISD::FP_TO_SINT_SAT:
3076   case ISD::FP_TO_UINT_SAT:
3077     ResVT = MVT::v4i32;
3078     break;
3079   case ISD::FP_ROUND:
3080     ResVT = MVT::v4f32;
3081     break;
3082   default:
3083     llvm_unreachable("unexpected op");
3084   }
3085 
3086   if (N->getValueType(0) != ResVT)
3087     return SDValue();
3088 
3089   auto Concat = N->getOperand(0);
3090   if (Concat.getValueType() != MVT::v4f64)
3091     return SDValue();
3092 
3093   auto Source = Concat.getOperand(0);
3094   if (Source.getValueType() != MVT::v2f64)
3095     return SDValue();
3096 
3097   if (!IsZeroSplat(Concat.getOperand(1)) ||
3098       Concat.getOperand(1).getValueType() != MVT::v2f64)
3099     return SDValue();
3100 
3101   unsigned Op = GetWasmConversionOp(ConversionOp);
3102   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3103 }
3104 
3105 // Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3106 static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3107                                 const SDLoc &DL, unsigned VectorWidth) {
3108   EVT VT = Vec.getValueType();
3109   EVT ElVT = VT.getVectorElementType();
3110   unsigned Factor = VT.getSizeInBits() / VectorWidth;
3111   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3112                                   VT.getVectorNumElements() / Factor);
3113 
3114   // Extract the relevant VectorWidth bits.  Generate an EXTRACT_SUBVECTOR
3115   unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3116   assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3117 
3118   // This is the index of the first element of the VectorWidth-bit chunk
3119   // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3120   IdxVal &= ~(ElemsPerChunk - 1);
3121 
3122   // If the input is a buildvector just emit a smaller one.
3123   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3124     return DAG.getBuildVector(ResultVT, DL,
3125                               Vec->ops().slice(IdxVal, ElemsPerChunk));
3126 
3127   SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3128   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3129 }
3130 
3131 // Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3132 // is the expected destination value type after recursion. In is the initial
3133 // input. Note that the input should have enough leading zero bits to prevent
3134 // NARROW_U from saturating results.
3135 static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL,
3136                                         SelectionDAG &DAG) {
3137   EVT SrcVT = In.getValueType();
3138 
3139   // No truncation required, we might get here due to recursive calls.
3140   if (SrcVT == DstVT)
3141     return In;
3142 
3143   unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3144   unsigned NumElems = SrcVT.getVectorNumElements();
3145   if (!isPowerOf2_32(NumElems))
3146     return SDValue();
3147   assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3148   assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3149 
3150   LLVMContext &Ctx = *DAG.getContext();
3151   EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3152 
3153   // Narrow to the largest type possible:
3154   // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3155   EVT InVT = MVT::i16, OutVT = MVT::i8;
3156   if (SrcVT.getScalarSizeInBits() > 16) {
3157     InVT = MVT::i32;
3158     OutVT = MVT::i16;
3159   }
3160   unsigned SubSizeInBits = SrcSizeInBits / 2;
3161   InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3162   OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3163 
3164   // Split lower/upper subvectors.
3165   SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3166   SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3167 
3168   // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3169   if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3170     Lo = DAG.getBitcast(InVT, Lo);
3171     Hi = DAG.getBitcast(InVT, Hi);
3172     SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3173     return DAG.getBitcast(DstVT, Res);
3174   }
3175 
3176   // Recursively narrow lower/upper subvectors, concat result and narrow again.
3177   EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3178   Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3179   Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3180 
3181   PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3182   SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3183   return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3184 }
3185 
3186 static SDValue performTruncateCombine(SDNode *N,
3187                                       TargetLowering::DAGCombinerInfo &DCI) {
3188   auto &DAG = DCI.DAG;
3189 
3190   SDValue In = N->getOperand(0);
3191   EVT InVT = In.getValueType();
3192   if (!InVT.isSimple())
3193     return SDValue();
3194 
3195   EVT OutVT = N->getValueType(0);
3196   if (!OutVT.isVector())
3197     return SDValue();
3198 
3199   EVT OutSVT = OutVT.getVectorElementType();
3200   EVT InSVT = InVT.getVectorElementType();
3201   // Currently only cover truncate to v16i8 or v8i16.
3202   if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3203         (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3204     return SDValue();
3205 
3206   SDLoc DL(N);
3207   APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
3208                                     OutVT.getScalarSizeInBits());
3209   In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3210   return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3211 }
3212 
3213 static SDValue performBitcastCombine(SDNode *N,
3214                                      TargetLowering::DAGCombinerInfo &DCI) {
3215   using namespace llvm::SDPatternMatch;
3216   auto &DAG = DCI.DAG;
3217   SDLoc DL(N);
3218   SDValue Src = N->getOperand(0);
3219   EVT VT = N->getValueType(0);
3220   EVT SrcVT = Src.getValueType();
3221 
3222   if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3223         SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3224     return SDValue();
3225 
3226   unsigned NumElts = SrcVT.getVectorNumElements();
3227   EVT Width = MVT::getIntegerVT(128 / NumElts);
3228 
3229   // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3230   //   ==> bitmask
3231   if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3232     return DAG.getZExtOrTrunc(
3233         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3234                     {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3235                      DAG.getSExtOrTrunc(N->getOperand(0), DL,
3236                                         SrcVT.changeVectorElementType(Width))}),
3237         DL, VT);
3238   }
3239 
3240   // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3241   if (NumElts == 32 || NumElts == 64) {
3242     // Strategy: We will setcc them seperately in v16i8 -> v16i1
3243     // Bitcast them to i16, extend them to either i32 or i64.
3244     // Add them together, shifting left 1 by 1.
3245     SDValue Concat, SetCCVector;
3246     ISD::CondCode SetCond;
3247 
3248     if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3249                                          m_CondCode(SetCond)))))
3250       return SDValue();
3251     if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3252       return SDValue();
3253 
3254     uint64_t ElementWidth =
3255         SetCCVector.getValueType().getVectorElementType().getFixedSizeInBits();
3256 
3257     SmallVector<SDValue> VectorsToShuffle;
3258     for (size_t I = 0; I < Concat->ops().size(); I++) {
3259       VectorsToShuffle.push_back(DAG.getBitcast(
3260           MVT::i16,
3261           DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3262                        extractSubVector(SetCCVector, I * (128 / ElementWidth),
3263                                         DAG, DL, 128),
3264                        SetCond)));
3265     }
3266 
3267     MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3268     SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3269 
3270     for (SDValue V : VectorsToShuffle) {
3271       ReturningInteger = DAG.getNode(
3272           ISD::SHL, DL, ReturnType,
3273           {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3274 
3275       SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3276       ReturningInteger =
3277           DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3278     }
3279 
3280     return ReturningInteger;
3281   }
3282 
3283   return SDValue();
3284 }
3285 
3286 static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
3287   // any_true (setcc <X>, 0, eq) => (not (all_true X))
3288   // all_true (setcc <X>, 0, eq) => (not (any_true X))
3289   // any_true (setcc <X>, 0, ne) => (any_true X)
3290   // all_true (setcc <X>, 0, ne) => (all_true X)
3291   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3292   using namespace llvm::SDPatternMatch;
3293 
3294   SDValue LHS;
3295   if (!sd_match(N->getOperand(1),
3296                 m_c_SetCC(m_Value(LHS), m_Zero(), m_CondCode())))
3297     return SDValue();
3298   EVT LT = LHS.getValueType();
3299   if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3300     return SDValue();
3301 
3302   auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3303                                  ISD::CondCode SetType,
3304                                  Intrinsic::WASMIntrinsics InPost) {
3305     if (N->getConstantOperandVal(0) != InPre)
3306       return SDValue();
3307 
3308     SDValue LHS;
3309     if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3310                                               m_SpecificCondCode(SetType))))
3311       return SDValue();
3312 
3313     SDLoc DL(N);
3314     SDValue Ret = DAG.getZExtOrTrunc(
3315         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3316                     {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3317         DL, MVT::i1);
3318     if (SetType == ISD::SETEQ)
3319       Ret = DAG.getNOT(DL, Ret, MVT::i1);
3320     return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3321   };
3322 
3323   if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3324                                        Intrinsic::wasm_alltrue))
3325     return AnyTrueEQ;
3326   if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3327                                        Intrinsic::wasm_anytrue))
3328     return AllTrueEQ;
3329   if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3330                                        Intrinsic::wasm_anytrue))
3331     return AnyTrueNE;
3332   if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3333                                        Intrinsic::wasm_alltrue))
3334     return AllTrueNE;
3335 
3336   return SDValue();
3337 }
3338 
3339 template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3340           Intrinsic::ID Intrin>
3341 static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
3342   SDValue LHS = N->getOperand(0);
3343   SDValue RHS = N->getOperand(1);
3344   SDValue Cond = N->getOperand(2);
3345   if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3346     return SDValue();
3347 
3348   if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3349     return SDValue();
3350 
3351   SDLoc DL(N);
3352   SDValue Ret = DAG.getZExtOrTrunc(
3353       DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3354                   {DAG.getConstant(Intrin, DL, MVT::i32),
3355                    DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3356       DL, MVT::i1);
3357   if (RequiresNegate)
3358     Ret = DAG.getNOT(DL, Ret, MVT::i1);
3359   return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3360 }
3361 
3362 static SDValue performSETCCCombine(SDNode *N,
3363                                    TargetLowering::DAGCombinerInfo &DCI) {
3364   if (!DCI.isBeforeLegalize())
3365     return SDValue();
3366 
3367   EVT VT = N->getValueType(0);
3368   if (!VT.isScalarInteger())
3369     return SDValue();
3370 
3371   SDValue LHS = N->getOperand(0);
3372   if (LHS->getOpcode() != ISD::BITCAST)
3373     return SDValue();
3374 
3375   EVT FromVT = LHS->getOperand(0).getValueType();
3376   if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3377     return SDValue();
3378 
3379   unsigned NumElts = FromVT.getVectorNumElements();
3380   if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3381     return SDValue();
3382 
3383   if (!cast<ConstantSDNode>(N->getOperand(1)))
3384     return SDValue();
3385 
3386   EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts));
3387   auto &DAG = DCI.DAG;
3388   // setcc (iN (bitcast (vNi1 X))), 0, ne
3389   //   ==> any_true (vNi1 X)
3390   if (auto Match = TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>(
3391           N, VecVT, DAG)) {
3392     return Match;
3393   }
3394   // setcc (iN (bitcast (vNi1 X))), 0, eq
3395   //   ==> xor (any_true (vNi1 X)), -1
3396   if (auto Match = TryMatchTrue<0, ISD::SETEQ, true, Intrinsic::wasm_anytrue>(
3397           N, VecVT, DAG)) {
3398     return Match;
3399   }
3400   // setcc (iN (bitcast (vNi1 X))), -1, eq
3401   //   ==> all_true (vNi1 X)
3402   if (auto Match = TryMatchTrue<-1, ISD::SETEQ, false, Intrinsic::wasm_alltrue>(
3403           N, VecVT, DAG)) {
3404     return Match;
3405   }
3406   // setcc (iN (bitcast (vNi1 X))), -1, ne
3407   //   ==> xor (all_true (vNi1 X)), -1
3408   if (auto Match = TryMatchTrue<-1, ISD::SETNE, true, Intrinsic::wasm_alltrue>(
3409           N, VecVT, DAG)) {
3410     return Match;
3411   }
3412   return SDValue();
3413 }
3414 
3415 static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG) {
3416   assert(N->getOpcode() == ISD::MUL);
3417   EVT VT = N->getValueType(0);
3418   if (VT != MVT::v8i32 && VT != MVT::v16i32)
3419     return SDValue();
3420 
3421   // Mul with extending inputs.
3422   SDValue LHS = N->getOperand(0);
3423   SDValue RHS = N->getOperand(1);
3424   if (LHS.getOpcode() != RHS.getOpcode())
3425     return SDValue();
3426 
3427   if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3428       LHS.getOpcode() != ISD::ZERO_EXTEND)
3429     return SDValue();
3430 
3431   if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3432     return SDValue();
3433 
3434   EVT FromVT = LHS->getOperand(0).getValueType();
3435   EVT EltTy = FromVT.getVectorElementType();
3436   if (EltTy != MVT::i8)
3437     return SDValue();
3438 
3439   // For an input DAG that looks like this
3440   // %a = input_type
3441   // %b = input_type
3442   // %lhs = extend %a to output_type
3443   // %rhs = extend %b to output_type
3444   // %mul = mul %lhs, %rhs
3445 
3446   // input_type | output_type | instructions
3447   // v16i8      | v16i32      | %low = i16x8.extmul_low_i8x16_ %a, %b
3448   //            |             | %high = i16x8.extmul_high_i8x16_, %a, %b
3449   //            |             | %low_low = i32x4.ext_low_i16x8_ %low
3450   //            |             | %low_high = i32x4.ext_high_i16x8_ %low
3451   //            |             | %high_low = i32x4.ext_low_i16x8_ %high
3452   //            |             | %high_high = i32x4.ext_high_i16x8_ %high
3453   //            |             | %res = concat_vector(...)
3454   // v8i8       | v8i32       | %low = i16x8.extmul_low_i8x16_ %a, %b
3455   //            |             | %low_low = i32x4.ext_low_i16x8_ %low
3456   //            |             | %low_high = i32x4.ext_high_i16x8_ %low
3457   //            |             | %res = concat_vector(%low_low, %low_high)
3458 
3459   SDLoc DL(N);
3460   unsigned NumElts = VT.getVectorNumElements();
3461   SDValue ExtendInLHS = LHS->getOperand(0);
3462   SDValue ExtendInRHS = RHS->getOperand(0);
3463   bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3464   unsigned ExtendLowOpc =
3465       IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3466   unsigned ExtendHighOpc =
3467       IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3468 
3469   auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3470     return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3471   };
3472   auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3473     return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3474   };
3475 
3476   if (NumElts == 16) {
3477     SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3478     SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3479     SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3480     SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3481     SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3482     SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3483     SDValue SubVectors[] = {
3484         GetExtendLow(MVT::v4i32, MulLow),
3485         GetExtendHigh(MVT::v4i32, MulLow),
3486         GetExtendLow(MVT::v4i32, MulHigh),
3487         GetExtendHigh(MVT::v4i32, MulHigh),
3488     };
3489     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3490   } else {
3491     assert(NumElts == 8);
3492     SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3493     SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3494     SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3495     SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3496     SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3497     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3498   }
3499   return SDValue();
3500 }
3501 
3502 SDValue
3503 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3504                                              DAGCombinerInfo &DCI) const {
3505   switch (N->getOpcode()) {
3506   default:
3507     return SDValue();
3508   case ISD::BITCAST:
3509     return performBitcastCombine(N, DCI);
3510   case ISD::SETCC:
3511     return performSETCCCombine(N, DCI);
3512   case ISD::VECTOR_SHUFFLE:
3513     return performVECTOR_SHUFFLECombine(N, DCI);
3514   case ISD::SIGN_EXTEND:
3515   case ISD::ZERO_EXTEND:
3516     return performVectorExtendCombine(N, DCI);
3517   case ISD::UINT_TO_FP:
3518   case ISD::SINT_TO_FP:
3519     return performVectorExtendToFPCombine(N, DCI);
3520   case ISD::FP_TO_SINT_SAT:
3521   case ISD::FP_TO_UINT_SAT:
3522   case ISD::FP_ROUND:
3523   case ISD::CONCAT_VECTORS:
3524     return performVectorTruncZeroCombine(N, DCI);
3525   case ISD::TRUNCATE:
3526     return performTruncateCombine(N, DCI);
3527   case ISD::INTRINSIC_WO_CHAIN: {
3528     if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG))
3529       return AnyAllCombine;
3530     return performLowerPartialReduction(N, DCI.DAG);
3531   }
3532   case ISD::MUL:
3533     return performMulCombine(N, DCI.DAG);
3534   }
3535 }
3536