xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// WebAssembly SIMD operand code-gen constructs.
11///
12//===----------------------------------------------------------------------===//
13
14// Instructions using the SIMD opcode prefix and requiring one of the SIMD
15// feature predicates.
16multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
17                           list<dag> pattern_r, string asmstr_r,
18                           string asmstr_s, bits<32> simdop,
19                           list<Predicate> reqs> {
20  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
21              !if(!ge(simdop, 0x100),
22                  !or(0xfd0000, !and(0xffff, simdop)),
23                  !or(0xfd00, !and(0xff, simdop)))>,
24            Requires<reqs>;
25}
26
27multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
28                  list<dag> pattern_r, string asmstr_r = "",
29                  string asmstr_s = "", bits<32> simdop = -1,
30                  list<Predicate> reqs = []> {
31  defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
32                            asmstr_s, simdop, !listconcat([HasSIMD128], reqs)>;
33}
34
35multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
36                     list<dag> pattern_r, string asmstr_r = "",
37                     string asmstr_s = "", bits<32> simdop = -1> {
38  defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
39                            asmstr_s, simdop, [HasRelaxedSIMD]>;
40}
41
42multiclass HALF_PRECISION_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
43                            list<dag> pattern_r, string asmstr_r = "",
44                            string asmstr_s = "", bits<32> simdop = -1> {
45  defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
46                            asmstr_s, simdop, [HasHalfPrecision]>;
47}
48
49
50defm "" : ARGUMENT<V128, v16i8>;
51defm "" : ARGUMENT<V128, v8i16>;
52defm "" : ARGUMENT<V128, v4i32>;
53defm "" : ARGUMENT<V128, v2i64>;
54defm "" : ARGUMENT<V128, v4f32>;
55defm "" : ARGUMENT<V128, v2f64>;
56defm "" : ARGUMENT<V128, v8f16>;
57
58// Constrained immediate argument types. Allow any value from the minimum signed
59// value to the maximum unsigned value for the lane size.
60foreach SIZE = [8, 16] in
61def ImmI#SIZE : ImmLeaf<i32,
62  // -2^(n-1) <= Imm < 2^n
63  "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << "#SIZE#");"
64>;
65foreach SIZE = [2, 4, 8, 16, 32] in
66def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
67
68class Vec {
69  ValueType vt;
70  ValueType int_vt;
71  ValueType lane_vt;
72  WebAssemblyRegClass lane_rc;
73  int lane_bits;
74  ImmLeaf lane_idx;
75  SDPatternOperator lane_load;
76  PatFrag splat;
77  string prefix;
78  Vec split;
79}
80
81def I8x16 : Vec {
82  let vt = v16i8;
83  let int_vt = vt;
84  let lane_vt = i32;
85  let lane_rc = I32;
86  let lane_bits = 8;
87  let lane_idx = LaneIdx16;
88  let lane_load = extloadi8;
89  let splat = PatFrag<(ops node:$x), (v16i8 (splat_vector (i8 $x)))>;
90  let prefix = "i8x16";
91}
92
93def I16x8 : Vec {
94  let vt = v8i16;
95  let int_vt = vt;
96  let lane_vt = i32;
97  let lane_rc = I32;
98  let lane_bits = 16;
99  let lane_idx = LaneIdx8;
100  let lane_load = extloadi16;
101  let splat = PatFrag<(ops node:$x), (v8i16 (splat_vector (i16 $x)))>;
102  let prefix = "i16x8";
103  let split = I8x16;
104}
105
106def I32x4 : Vec {
107  let vt = v4i32;
108  let int_vt = vt;
109  let lane_vt = i32;
110  let lane_rc = I32;
111  let lane_bits = 32;
112  let lane_idx = LaneIdx4;
113  let lane_load = load;
114  let splat = PatFrag<(ops node:$x), (v4i32 (splat_vector (i32 $x)))>;
115  let prefix = "i32x4";
116  let split = I16x8;
117}
118
119def I64x2 : Vec {
120  let vt = v2i64;
121  let int_vt = vt;
122  let lane_vt = i64;
123  let lane_rc = I64;
124  let lane_bits = 64;
125  let lane_idx = LaneIdx2;
126  let lane_load = load;
127  let splat = PatFrag<(ops node:$x), (v2i64 (splat_vector (i64 $x)))>;
128  let prefix = "i64x2";
129  let split = I32x4;
130}
131
132def F32x4 : Vec {
133  let vt = v4f32;
134  let int_vt = v4i32;
135  let lane_vt = f32;
136  let lane_rc = F32;
137  let lane_bits = 32;
138  let lane_idx = LaneIdx4;
139  let lane_load = load;
140  let splat = PatFrag<(ops node:$x), (v4f32 (splat_vector (f32 $x)))>;
141  let prefix = "f32x4";
142}
143
144def F64x2 : Vec {
145  let vt = v2f64;
146  let int_vt = v2i64;
147  let lane_vt = f64;
148  let lane_rc = F64;
149  let lane_bits = 64;
150  let lane_idx = LaneIdx2;
151  let lane_load = load;
152  let splat = PatFrag<(ops node:$x), (v2f64 (splat_vector (f64 $x)))>;
153  let prefix = "f64x2";
154}
155
156def F16x8 : Vec {
157 let vt = v8f16;
158 let int_vt = v8i16;
159 let lane_vt = f32;
160 let lane_rc = F32;
161 let lane_bits = 16;
162 let lane_idx = LaneIdx8;
163 let lane_load = int_wasm_loadf16_f32;
164 let splat = PatFrag<(ops node:$x), (v8f16 (splat_vector (f16 $x)))>;
165 let prefix = "f16x8";
166}
167
168// TODO: Include F16x8 here when half precision is better supported.
169defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
170defvar IntVecs = [I8x16, I16x8, I32x4, I64x2];
171
172//===----------------------------------------------------------------------===//
173// Load and store
174//===----------------------------------------------------------------------===//
175
176// Load: v128.load
177let mayLoad = 1, UseNamedOperandTable = 1 in {
178defm LOAD_V128_A32 :
179  SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
180         (outs), (ins P2Align:$p2align, offset32_op:$off), [],
181         "v128.load\t$dst, ${off}(${addr})$p2align",
182         "v128.load\t$off$p2align", 0>;
183defm LOAD_V128_A64 :
184  SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
185         (outs), (ins P2Align:$p2align, offset64_op:$off), [],
186         "v128.load\t$dst, ${off}(${addr})$p2align",
187         "v128.load\t$off$p2align", 0>;
188}
189
190// Def load patterns from WebAssemblyInstrMemory.td for vector types
191foreach vec = AllVecs in {
192defm : LoadPat<vec.vt, load, "LOAD_V128">;
193}
194
195// v128.loadX_splat
196multiclass SIMDLoadSplat<int size, bits<32> simdop> {
197  let mayLoad = 1, UseNamedOperandTable = 1 in {
198  defm LOAD#size#_SPLAT_A32 :
199    SIMD_I<(outs V128:$dst),
200           (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
201           (outs),
202           (ins P2Align:$p2align, offset32_op:$off), [],
203           "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
204           "v128.load"#size#"_splat\t$off$p2align", simdop>;
205  defm LOAD#size#_SPLAT_A64 :
206    SIMD_I<(outs V128:$dst),
207           (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
208           (outs),
209           (ins P2Align:$p2align, offset64_op:$off), [],
210           "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
211           "v128.load"#size#"_splat\t$off$p2align", simdop>;
212  }
213}
214
215defm "" : SIMDLoadSplat<8, 7>;
216defm "" : SIMDLoadSplat<16, 8>;
217defm "" : SIMDLoadSplat<32, 9>;
218defm "" : SIMDLoadSplat<64, 10>;
219
220foreach vec = AllVecs in {
221  defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
222  defm : LoadPat<vec.vt,
223                 PatFrag<(ops node:$addr), (splat_vector (vec.lane_vt (vec.lane_load node:$addr)))>,
224                 inst>;
225}
226
227// Load and extend
228multiclass SIMDLoadExtend<Vec vec, string loadPat, bits<32> simdop> {
229  defvar signed = vec.prefix#".load"#loadPat#"_s";
230  defvar unsigned = vec.prefix#".load"#loadPat#"_u";
231  let mayLoad = 1, UseNamedOperandTable = 1 in {
232  defm LOAD_EXTEND_S_#vec#_A32 :
233    SIMD_I<(outs V128:$dst),
234           (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
235           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
236           signed#"\t$dst, ${off}(${addr})$p2align",
237           signed#"\t$off$p2align", simdop>;
238  defm LOAD_EXTEND_U_#vec#_A32 :
239    SIMD_I<(outs V128:$dst),
240           (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
241           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
242           unsigned#"\t$dst, ${off}(${addr})$p2align",
243           unsigned#"\t$off$p2align", !add(simdop, 1)>;
244  defm LOAD_EXTEND_S_#vec#_A64 :
245    SIMD_I<(outs V128:$dst),
246           (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
247           (outs), (ins P2Align:$p2align, offset64_op:$off), [],
248           signed#"\t$dst, ${off}(${addr})$p2align",
249           signed#"\t$off$p2align", simdop>;
250  defm LOAD_EXTEND_U_#vec#_A64 :
251    SIMD_I<(outs V128:$dst),
252           (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
253           (outs), (ins P2Align:$p2align, offset64_op:$off), [],
254           unsigned#"\t$dst, ${off}(${addr})$p2align",
255           unsigned#"\t$off$p2align", !add(simdop, 1)>;
256  }
257}
258
259defm "" : SIMDLoadExtend<I16x8, "8x8", 1>;
260defm "" : SIMDLoadExtend<I32x4, "16x4", 3>;
261defm "" : SIMDLoadExtend<I64x2, "32x2", 5>;
262
263foreach vec = [I16x8, I32x4, I64x2] in
264foreach exts = [["sextloadvi", "_S"],
265                ["zextloadvi", "_U"],
266                ["extloadvi", "_U"]] in {
267defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits);
268defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec;
269defm : LoadPat<vec.vt, loadpat, inst>;
270}
271
272// Load lane into zero vector
273multiclass SIMDLoadZero<Vec vec, bits<32> simdop> {
274  defvar name = "v128.load"#vec.lane_bits#"_zero";
275  let mayLoad = 1, UseNamedOperandTable = 1 in {
276  defm LOAD_ZERO_#vec#_A32 :
277    SIMD_I<(outs V128:$dst),
278           (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
279           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
280           name#"\t$dst, ${off}(${addr})$p2align",
281           name#"\t$off$p2align", simdop>;
282  defm LOAD_ZERO_#vec#_A64 :
283    SIMD_I<(outs V128:$dst),
284           (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
285           (outs), (ins P2Align:$p2align, offset64_op:$off), [],
286           name#"\t$dst, ${off}(${addr})$p2align",
287           name#"\t$off$p2align", simdop>;
288  } // mayLoad = 1, UseNamedOperandTable = 1
289}
290
291defm "" : SIMDLoadZero<I32x4, 0x5c>;
292defm "" : SIMDLoadZero<I64x2, 0x5d>;
293
294// Use load_zero to load scalars into vectors as well where possible.
295// TODO: i16, and i8 scalars
296foreach vec = [I32x4, I64x2] in {
297  defvar inst = "LOAD_ZERO_"#vec;
298  defvar pat = PatFrag<(ops node:$addr), (scalar_to_vector (vec.lane_vt (load $addr)))>;
299  defm : LoadPat<vec.vt, pat, inst>;
300}
301
302// TODO: f32x4 and f64x2 as well
303foreach vec = [I32x4, I64x2] in {
304  defvar inst = "LOAD_ZERO_"#vec;
305  defvar pat = PatFrag<(ops node:$ptr),
306    (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>;
307  defm : LoadPat<vec.vt, pat, inst>;
308}
309
310// Load lane
311multiclass SIMDLoadLane<Vec vec, bits<32> simdop> {
312  defvar name = "v128.load"#vec.lane_bits#"_lane";
313  let mayLoad = 1, UseNamedOperandTable = 1 in {
314  defm LOAD_LANE_#vec#_A32 :
315    SIMD_I<(outs V128:$dst),
316           (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx,
317                I32:$addr, V128:$vec),
318           (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx),
319           [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx",
320           name#"\t$off$p2align, $idx", simdop>;
321  defm LOAD_LANE_#vec#_A64 :
322    SIMD_I<(outs V128:$dst),
323           (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
324                I64:$addr, V128:$vec),
325           (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
326           [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx",
327           name#"\t$off$p2align, $idx", simdop>;
328  } // mayLoad = 1, UseNamedOperandTable = 1
329}
330
331defm "" : SIMDLoadLane<I8x16, 0x54>;
332defm "" : SIMDLoadLane<I16x8, 0x55>;
333defm "" : SIMDLoadLane<I32x4, 0x56>;
334defm "" : SIMDLoadLane<I64x2, 0x57>;
335
336// Select loads with no constant offset.
337multiclass LoadLanePatNoOffset<Vec vec, SDPatternOperator kind> {
338  defvar load_lane_a32 = !cast<NI>("LOAD_LANE_"#vec#"_A32");
339  defvar load_lane_a64 = !cast<NI>("LOAD_LANE_"#vec#"_A64");
340  def : Pat<(vec.vt (kind (i32 I32:$addr),
341              (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))),
342            (load_lane_a32 0, 0, imm:$idx, $addr, $vec)>,
343        Requires<[HasAddr32]>;
344  def : Pat<(vec.vt (kind (i64 I64:$addr),
345              (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))),
346            (load_lane_a64 0, 0, imm:$idx, $addr, $vec)>,
347        Requires<[HasAddr64]>;
348}
349
350def load8_lane :
351  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
352          (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>;
353def load16_lane :
354  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
355          (vector_insert $vec, (i32 (extloadi16 $ptr)), $idx)>;
356def load32_lane :
357  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
358          (vector_insert $vec, (i32 (load $ptr)), $idx)>;
359def load64_lane :
360  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
361          (vector_insert $vec, (i64 (load $ptr)), $idx)>;
362// TODO: floating point lanes as well
363
364defm : LoadLanePatNoOffset<I8x16, load8_lane>;
365defm : LoadLanePatNoOffset<I16x8, load16_lane>;
366defm : LoadLanePatNoOffset<I32x4, load32_lane>;
367defm : LoadLanePatNoOffset<I64x2, load64_lane>;
368
369// TODO: Also support the other load patterns for load_lane once the instructions
370// are merged to the proposal.
371
372// Store: v128.store
373let mayStore = 1, UseNamedOperandTable = 1 in {
374defm STORE_V128_A32 :
375  SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
376         (outs), (ins P2Align:$p2align, offset32_op:$off), [],
377         "v128.store\t${off}(${addr})$p2align, $vec",
378         "v128.store\t$off$p2align", 11>;
379defm STORE_V128_A64 :
380  SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec),
381         (outs), (ins P2Align:$p2align, offset64_op:$off), [],
382         "v128.store\t${off}(${addr})$p2align, $vec",
383         "v128.store\t$off$p2align", 11>;
384}
385
386// Def store patterns from WebAssemblyInstrMemory.td for vector types
387foreach vec = AllVecs in {
388defm : StorePat<vec.vt, store, "STORE_V128">;
389}
390
391// Store lane
392multiclass SIMDStoreLane<Vec vec, bits<32> simdop> {
393  defvar name = "v128.store"#vec.lane_bits#"_lane";
394  let mayStore = 1, UseNamedOperandTable = 1 in {
395  defm STORE_LANE_#vec#_A32 :
396    SIMD_I<(outs),
397           (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx,
398                I32:$addr, V128:$vec),
399           (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx),
400           [], name#"\t${off}(${addr})$p2align, $vec, $idx",
401           name#"\t$off$p2align, $idx", simdop>;
402  defm STORE_LANE_#vec#_A64 :
403    SIMD_I<(outs),
404           (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
405                I64:$addr, V128:$vec),
406           (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
407           [], name#"\t${off}(${addr})$p2align, $vec, $idx",
408           name#"\t$off$p2align, $idx", simdop>;
409  } // mayStore = 1, UseNamedOperandTable = 1
410}
411
412defm "" : SIMDStoreLane<I8x16, 0x58>;
413defm "" : SIMDStoreLane<I16x8, 0x59>;
414defm "" : SIMDStoreLane<I32x4, 0x5a>;
415defm "" : SIMDStoreLane<I64x2, 0x5b>;
416
417multiclass StoreLanePat<Vec vec, SDPatternOperator kind> {
418  def : Pat<(kind (AddrOps32 offset32_op:$offset, I32:$addr),
419                  (vec.vt V128:$vec),
420                  (i32 vec.lane_idx:$idx)),
421            (!cast<NI>("STORE_LANE_"#vec#"_A32") 0, $offset, imm:$idx, $addr, $vec)>,
422        Requires<[HasAddr32]>;
423  def : Pat<(kind (AddrOps64 offset64_op:$offset, I64:$addr),
424                  (vec.vt V128:$vec),
425                  (i32 vec.lane_idx:$idx)),
426            (!cast<NI>("STORE_LANE_"#vec#"_A64") 0, $offset, imm:$idx, $addr, $vec)>,
427        Requires<[HasAddr64]>;
428}
429
430def store8_lane :
431  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
432          (truncstorei8 (i32 (vector_extract $vec, $idx)), $ptr)>;
433def store16_lane :
434  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
435          (truncstorei16 (i32 (vector_extract $vec, $idx)), $ptr)>;
436def store32_lane :
437  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
438          (store (i32 (vector_extract $vec, $idx)), $ptr)>;
439def store64_lane :
440  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
441          (store (i64 (vector_extract $vec, $idx)), $ptr)>;
442// TODO: floating point lanes as well
443
444let AddedComplexity = 1 in {
445defm : StoreLanePat<I8x16, store8_lane>;
446defm : StoreLanePat<I16x8, store16_lane>;
447defm : StoreLanePat<I32x4, store32_lane>;
448defm : StoreLanePat<I64x2, store64_lane>;
449}
450
451//===----------------------------------------------------------------------===//
452// Constructing SIMD values
453//===----------------------------------------------------------------------===//
454
455// Constant: v128.const
456multiclass ConstVec<Vec vec, dag ops, dag pat, string args> {
457  let isMoveImm = 1, isReMaterializable = 1 in
458  defm CONST_V128_#vec : SIMD_I<(outs V128:$dst), ops, (outs), ops,
459                                 [(set V128:$dst, (vec.vt pat))],
460                                 "v128.const\t$dst, "#args,
461                                 "v128.const\t"#args, 12>;
462}
463
464defm "" : ConstVec<I8x16,
465                   (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
466                        vec_i8imm_op:$i2, vec_i8imm_op:$i3,
467                        vec_i8imm_op:$i4, vec_i8imm_op:$i5,
468                        vec_i8imm_op:$i6, vec_i8imm_op:$i7,
469                        vec_i8imm_op:$i8, vec_i8imm_op:$i9,
470                        vec_i8imm_op:$iA, vec_i8imm_op:$iB,
471                        vec_i8imm_op:$iC, vec_i8imm_op:$iD,
472                        vec_i8imm_op:$iE, vec_i8imm_op:$iF),
473                   (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3,
474                                 ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7,
475                                 ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB,
476                                 ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
477                   !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
478                              "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
479defm "" : ConstVec<I16x8,
480                   (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
481                        vec_i16imm_op:$i2, vec_i16imm_op:$i3,
482                        vec_i16imm_op:$i4, vec_i16imm_op:$i5,
483                        vec_i16imm_op:$i6, vec_i16imm_op:$i7),
484                   (build_vector
485                     ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
486                     ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
487                   "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
488let IsCanonical = 1 in
489defm "" : ConstVec<I32x4,
490                   (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
491                        vec_i32imm_op:$i2, vec_i32imm_op:$i3),
492                   (build_vector (i32 imm:$i0), (i32 imm:$i1),
493                                 (i32 imm:$i2), (i32 imm:$i3)),
494                   "$i0, $i1, $i2, $i3">;
495defm "" : ConstVec<I64x2,
496                   (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
497                   (build_vector (i64 imm:$i0), (i64 imm:$i1)),
498                   "$i0, $i1">;
499defm "" : ConstVec<F32x4,
500                   (ins f32imm_op:$i0, f32imm_op:$i1,
501                        f32imm_op:$i2, f32imm_op:$i3),
502                   (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
503                                 (f32 fpimm:$i2), (f32 fpimm:$i3)),
504                   "$i0, $i1, $i2, $i3">;
505defm "" : ConstVec<F64x2,
506                  (ins f64imm_op:$i0, f64imm_op:$i1),
507                  (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
508                  "$i0, $i1">;
509
510// Match splat(x) -> const.v128(x, ..., x)
511foreach vec = AllVecs in {
512  defvar numEls = !div(vec.vt.Size, vec.lane_bits);
513  defvar isFloat = !or(!eq(vec.lane_vt, f32), !eq(vec.lane_vt, f64));
514  defvar immKind = !if(isFloat, fpimm, imm);
515  def : Pat<(vec.splat (vec.lane_vt immKind:$x)),
516            !dag(!cast<NI>("CONST_V128_"#vec),
517                 !listsplat((vec.lane_vt immKind:$x), numEls),
518                 ?)>;
519}
520
521// Shuffle lanes: shuffle
522defm SHUFFLE :
523  SIMD_I<(outs V128:$dst),
524         (ins V128:$x, V128:$y,
525           vec_i8imm_op:$m0, vec_i8imm_op:$m1,
526           vec_i8imm_op:$m2, vec_i8imm_op:$m3,
527           vec_i8imm_op:$m4, vec_i8imm_op:$m5,
528           vec_i8imm_op:$m6, vec_i8imm_op:$m7,
529           vec_i8imm_op:$m8, vec_i8imm_op:$m9,
530           vec_i8imm_op:$mA, vec_i8imm_op:$mB,
531           vec_i8imm_op:$mC, vec_i8imm_op:$mD,
532           vec_i8imm_op:$mE, vec_i8imm_op:$mF),
533         (outs),
534         (ins
535           vec_i8imm_op:$m0, vec_i8imm_op:$m1,
536           vec_i8imm_op:$m2, vec_i8imm_op:$m3,
537           vec_i8imm_op:$m4, vec_i8imm_op:$m5,
538           vec_i8imm_op:$m6, vec_i8imm_op:$m7,
539           vec_i8imm_op:$m8, vec_i8imm_op:$m9,
540           vec_i8imm_op:$mA, vec_i8imm_op:$mB,
541           vec_i8imm_op:$mC, vec_i8imm_op:$mD,
542           vec_i8imm_op:$mE, vec_i8imm_op:$mF),
543         [],
544         "i8x16.shuffle\t$dst, $x, $y, "#
545           "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
546           "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
547         "i8x16.shuffle\t"#
548           "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
549           "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
550         13>;
551
552// Shuffles after custom lowering
553def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
554def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
555foreach vec = AllVecs in {
556// The @llvm.wasm.shuffle intrinsic has immediate arguments that become TargetConstants.
557def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y),
558            (i32 timm:$m0), (i32 timm:$m1),
559            (i32 timm:$m2), (i32 timm:$m3),
560            (i32 timm:$m4), (i32 timm:$m5),
561            (i32 timm:$m6), (i32 timm:$m7),
562            (i32 timm:$m8), (i32 timm:$m9),
563            (i32 timm:$mA), (i32 timm:$mB),
564            (i32 timm:$mC), (i32 timm:$mD),
565            (i32 timm:$mE), (i32 timm:$mF))),
566          (SHUFFLE $x, $y,
567            imm:$m0, imm:$m1, imm:$m2, imm:$m3,
568            imm:$m4, imm:$m5, imm:$m6, imm:$m7,
569            imm:$m8, imm:$m9, imm:$mA, imm:$mB,
570            imm:$mC, imm:$mD, imm:$mE, imm:$mF)>;
571// Normal shufflevector instructions may have normal constant arguemnts.
572def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y),
573            (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
574            (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
575            (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
576            (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
577            (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
578            (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
579            (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
580            (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
581          (SHUFFLE $x, $y,
582            imm:$m0, imm:$m1, imm:$m2, imm:$m3,
583            imm:$m4, imm:$m5, imm:$m6, imm:$m7,
584            imm:$m8, imm:$m9, imm:$mA, imm:$mB,
585            imm:$mC, imm:$mD, imm:$mE, imm:$mF)>;
586}
587
588// Swizzle lanes: i8x16.swizzle
589def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
590def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
591defm SWIZZLE :
592  SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
593         [(set (v16i8 V128:$dst),
594           (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
595         "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>;
596
597def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)),
598          (SWIZZLE $src, $mask)>;
599
600multiclass Splat<Vec vec, bits<32> simdop> {
601  defm SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins vec.lane_rc:$x),
602                           (outs), (ins),
603                           [(set (vec.vt V128:$dst),
604                              (vec.splat vec.lane_rc:$x))],
605                           vec.prefix#".splat\t$dst, $x", vec.prefix#".splat",
606                           simdop>;
607}
608
609defm "" : Splat<I8x16, 15>;
610defm "" : Splat<I16x8, 16>;
611defm "" : Splat<I32x4, 17>;
612defm "" : Splat<I64x2, 18>;
613defm "" : Splat<F32x4, 19>;
614defm "" : Splat<F64x2, 20>;
615
616// Half values are not fully supported so an intrinsic is used instead of a
617// regular Splat pattern as above.
618defm SPLAT_F16x8 :
619  HALF_PRECISION_I<(outs V128:$dst), (ins F32:$x),
620                   (outs), (ins),
621                   [(set (v8f16 V128:$dst), (int_wasm_splat_f16x8 F32:$x))],
622                   "f16x8.splat\t$dst, $x", "f16x8.splat", 0x120>;
623
624// scalar_to_vector leaves high lanes undefined, so can be a splat
625foreach vec = AllVecs in
626def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))),
627          (!cast<Instruction>("SPLAT_"#vec) $x)>;
628
629//===----------------------------------------------------------------------===//
630// Accessing lanes
631//===----------------------------------------------------------------------===//
632
633// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
634multiclass ExtractLane<Vec vec, bits<32> simdop, string suffix = ""> {
635  defm EXTRACT_LANE_#vec#suffix :
636      SIMD_I<(outs vec.lane_rc:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
637             (outs), (ins vec_i8imm_op:$idx), [],
638             vec.prefix#".extract_lane"#suffix#"\t$dst, $vec, $idx",
639             vec.prefix#".extract_lane"#suffix#"\t$idx", simdop>;
640}
641
642defm "" : ExtractLane<I8x16, 21, "_s">;
643defm "" : ExtractLane<I8x16, 22, "_u">;
644defm "" : ExtractLane<I16x8, 24, "_s">;
645defm "" : ExtractLane<I16x8, 25, "_u">;
646defm "" : ExtractLane<I32x4, 27>;
647defm "" : ExtractLane<I64x2, 29>;
648defm "" : ExtractLane<F32x4, 31>;
649defm "" : ExtractLane<F64x2, 33>;
650
651def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
652          (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>;
653def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
654          (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>;
655def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
656          (EXTRACT_LANE_I32x4 $vec, imm:$idx)>;
657def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
658          (EXTRACT_LANE_F32x4 $vec, imm:$idx)>;
659def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
660          (EXTRACT_LANE_I64x2 $vec, imm:$idx)>;
661def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
662          (EXTRACT_LANE_F64x2 $vec, imm:$idx)>;
663
664def : Pat<
665  (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
666  (EXTRACT_LANE_I8x16_s $vec, imm:$idx)>;
667def : Pat<
668  (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
669  (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>;
670def : Pat<
671  (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
672  (EXTRACT_LANE_I16x8_s $vec, imm:$idx)>;
673def : Pat<
674  (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
675  (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>;
676
677defm EXTRACT_LANE_F16x8 :
678  HALF_PRECISION_I<(outs F32:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
679                   (outs), (ins vec_i8imm_op:$idx),
680                   [(set (f32 F32:$dst), (int_wasm_extract_lane_f16x8
681                    (v8f16 V128:$vec), (i32 LaneIdx16:$idx)))],
682                   "f16x8.extract_lane\t$dst, $vec, $idx",
683                   "f16x8.extract_lane\t$idx", 0x121>;
684
685// Replace lane value: replace_lane
686multiclass ReplaceLane<Vec vec, bits<32> simdop> {
687  defm REPLACE_LANE_#vec :
688    SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, vec.lane_rc:$x),
689           (outs), (ins vec_i8imm_op:$idx),
690           [(set V128:$dst, (vector_insert
691             (vec.vt V128:$vec),
692             (vec.lane_vt vec.lane_rc:$x),
693             (i32 vec.lane_idx:$idx)))],
694           vec.prefix#".replace_lane\t$dst, $vec, $idx, $x",
695           vec.prefix#".replace_lane\t$idx", simdop>;
696}
697
698defm "" : ReplaceLane<I8x16, 23>;
699defm "" : ReplaceLane<I16x8, 26>;
700defm "" : ReplaceLane<I32x4, 28>;
701defm "" : ReplaceLane<I64x2, 30>;
702defm "" : ReplaceLane<F32x4, 32>;
703defm "" : ReplaceLane<F64x2, 34>;
704
705// Lower undef lane indices to zero
706def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
707          (REPLACE_LANE_I8x16 $vec, 0, $x)>;
708def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
709          (REPLACE_LANE_I16x8 $vec, 0, $x)>;
710def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
711          (REPLACE_LANE_I32x4 $vec, 0, $x)>;
712def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
713          (REPLACE_LANE_I64x2 $vec, 0, $x)>;
714def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
715          (REPLACE_LANE_F32x4 $vec, 0, $x)>;
716def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
717          (REPLACE_LANE_F64x2 $vec, 0, $x)>;
718
719//===----------------------------------------------------------------------===//
720// Comparisons
721//===----------------------------------------------------------------------===//
722
723multiclass SIMDCondition<Vec vec, string name, CondCode cond, bits<32> simdop,
724                         list<Predicate> reqs = []> {
725  defm _#vec :
726    SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
727           [(set (vec.int_vt V128:$dst),
728             (setcc (vec.vt V128:$lhs), (vec.vt V128:$rhs), cond))],
729           vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
730           vec.prefix#"."#name, simdop, reqs>;
731}
732
733multiclass HalfPrecisionCondition<Vec vec, string name, CondCode cond,
734                                  bits<32> simdop> {
735  defm "" : SIMDCondition<vec, name, cond, simdop, [HasHalfPrecision]>;
736}
737
738multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> {
739  defm "" : SIMDCondition<I8x16, name, cond, baseInst>;
740  defm "" : SIMDCondition<I16x8, name, cond, !add(baseInst, 10)>;
741  defm "" : SIMDCondition<I32x4, name, cond, !add(baseInst, 20)>;
742}
743
744multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
745  defm "" : SIMDCondition<F32x4, name, cond, baseInst>;
746  defm "" : SIMDCondition<F64x2, name, cond, !add(baseInst, 6)>;
747  defm "" : HalfPrecisionCondition<F16x8, name, cond, !add(baseInst, 255)>;
748}
749
750// Equality: eq
751let isCommutable = 1 in {
752defm EQ : SIMDConditionInt<"eq", SETEQ, 35>;
753defm EQ : SIMDCondition<I64x2, "eq", SETEQ, 214>;
754defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>;
755} // isCommutable = 1
756
757// Non-equality: ne
758let isCommutable = 1 in {
759defm NE : SIMDConditionInt<"ne", SETNE, 36>;
760defm NE : SIMDCondition<I64x2, "ne", SETNE, 215>;
761defm NE : SIMDConditionFP<"ne", SETUNE, 66>;
762} // isCommutable = 1
763
764// Less than: lt_s / lt_u / lt
765defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>;
766defm LT_S : SIMDCondition<I64x2, "lt_s", SETLT, 216>;
767defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>;
768defm LT : SIMDConditionFP<"lt", SETOLT, 67>;
769
770// Greater than: gt_s / gt_u / gt
771defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>;
772defm GT_S : SIMDCondition<I64x2, "gt_s", SETGT, 217>;
773defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>;
774defm GT : SIMDConditionFP<"gt", SETOGT, 68>;
775
776// Less than or equal: le_s / le_u / le
777defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>;
778defm LE_S : SIMDCondition<I64x2, "le_s", SETLE, 218>;
779defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>;
780defm LE : SIMDConditionFP<"le", SETOLE, 69>;
781
782// Greater than or equal: ge_s / ge_u / ge
783defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>;
784defm GE_S : SIMDCondition<I64x2, "ge_s", SETGE, 219>;
785defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>;
786defm GE : SIMDConditionFP<"ge", SETOGE, 70>;
787
788// Lower float comparisons that don't care about NaN to standard WebAssembly
789// float comparisons. These instructions are generated with nnan and in the
790// target-independent expansion of unordered comparisons and ordered ne.
791foreach nodes = [[seteq, EQ_F32x4], [setne, NE_F32x4], [setlt, LT_F32x4],
792                 [setgt, GT_F32x4], [setle, LE_F32x4], [setge, GE_F32x4]] in
793def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
794          (nodes[1] $lhs, $rhs)>;
795
796foreach nodes = [[seteq, EQ_F64x2], [setne, NE_F64x2], [setlt, LT_F64x2],
797                 [setgt, GT_F64x2], [setle, LE_F64x2], [setge, GE_F64x2]] in
798def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
799          (nodes[1] $lhs, $rhs)>;
800
801//===----------------------------------------------------------------------===//
802// Bitwise operations
803//===----------------------------------------------------------------------===//
804
805multiclass SIMDBinary<Vec vec, SDPatternOperator node, string name,
806                      bits<32> simdop, list<Predicate> reqs = []> {
807  defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
808                      (outs), (ins),
809                      [(set (vec.vt V128:$dst),
810                        (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))],
811                      vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
812                      vec.prefix#"."#name, simdop, reqs>;
813}
814
815multiclass HalfPrecisionBinary<Vec vec, SDPatternOperator node, string name,
816                               bits<32> simdop> {
817  defm "" : SIMDBinary<vec, node, name, simdop, [HasHalfPrecision]>;
818}
819
820multiclass SIMDBitwise<SDPatternOperator node, string name, bits<32> simdop,
821                       bit commutable = false> {
822  let isCommutable = commutable in
823  defm "" : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
824                   (outs), (ins), [],
825                   "v128."#name#"\t$dst, $lhs, $rhs", "v128."#name, simdop>;
826  foreach vec = IntVecs in
827  def : Pat<(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
828            (!cast<NI>(NAME) $lhs, $rhs)>;
829}
830
831multiclass SIMDUnary<Vec vec, SDPatternOperator node, string name,
832                     bits<32> simdop, list<Predicate> reqs = []> {
833  defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins),
834                      [(set (vec.vt V128:$dst),
835                        (vec.vt (node (vec.vt V128:$v))))],
836                      vec.prefix#"."#name#"\t$dst, $v",
837                      vec.prefix#"."#name, simdop, reqs>;
838}
839
840multiclass HalfPrecisionUnary<Vec vec, SDPatternOperator node, string name,
841                              bits<32> simdop> {
842  defm "" : SIMDUnary<vec, node, name, simdop, [HasHalfPrecision]>;
843}
844
845// Bitwise logic: v128.not
846defm NOT : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), [],
847                  "v128.not\t$dst, $v", "v128.not", 77>;
848foreach vec = IntVecs in
849def : Pat<(vnot (vec.vt V128:$v)), (NOT $v)>;
850
851// Bitwise logic: v128.and / v128.or / v128.xor
852defm AND : SIMDBitwise<and, "and", 78, true>;
853defm OR : SIMDBitwise<or, "or", 80, true>;
854defm XOR : SIMDBitwise<xor, "xor", 81, true>;
855
856// Bitwise logic: v128.andnot
857def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>;
858defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>;
859
860// Bitwise select: v128.bitselect
861defm BITSELECT :
862  SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [],
863         "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>;
864
865foreach vec = AllVecs in
866def : Pat<(vec.vt (int_wasm_bitselect
867            (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))),
868          (BITSELECT $v1, $v2, $c)>;
869
870// Bitselect is equivalent to (c & v1) | (~c & v2)
871foreach vec = IntVecs in
872def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)),
873            (and (vnot V128:$c), (vec.vt V128:$v2)))),
874          (BITSELECT $v1, $v2, $c)>;
875
876// Bitselect is also equivalent to ((v1 ^ v2) & c) ^ v2
877foreach vec = IntVecs in
878def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
879                            (vec.vt V128:$c)),
880                       (vec.vt V128:$v2))),
881          (BITSELECT $v1, $v2, $c)>;
882
883// Same pattern with `c` negated so `a` and `b` get swapped.
884foreach vec = IntVecs in
885def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
886                            (vnot (vec.vt V128:$c))),
887                       (vec.vt V128:$v2))),
888          (BITSELECT $v2, $v1, $c)>;
889
890// Also implement vselect in terms of bitselect
891foreach vec = AllVecs in
892def : Pat<(vec.vt (vselect
893            (vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))),
894          (BITSELECT $v1, $v2, $c)>;
895
896// MVP select on v128 values
897defm SELECT_V128 :
898  I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [],
899    "v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>;
900
901foreach vec = AllVecs in {
902def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
903          (SELECT_V128 $lhs, $rhs, $cond)>;
904
905// ISD::SELECT requires its operand to conform to getBooleanContents, but
906// WebAssembly's select interprets any non-zero value as true, so we can fold
907// a setne with 0 into a select.
908def : Pat<(select
909            (i32 (setne I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
910          (SELECT_V128 $lhs, $rhs, $cond)>;
911
912// And again, this time with seteq instead of setne and the arms reversed.
913def : Pat<(select
914            (i32 (seteq I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
915          (SELECT_V128 $rhs, $lhs, $cond)>;
916} // foreach vec
917
918//===----------------------------------------------------------------------===//
919// Integer unary arithmetic
920//===----------------------------------------------------------------------===//
921
922multiclass SIMDUnaryInt<SDPatternOperator node, string name, bits<32> baseInst> {
923  defm "" : SIMDUnary<I8x16, node, name, baseInst>;
924  defm "" : SIMDUnary<I16x8, node, name, !add(baseInst, 32)>;
925  defm "" : SIMDUnary<I32x4, node, name, !add(baseInst, 64)>;
926  defm "" : SIMDUnary<I64x2, node, name, !add(baseInst, 96)>;
927}
928
929// Integer vector negation
930def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, $in)>;
931
932// Integer absolute value: abs
933defm ABS : SIMDUnaryInt<abs, "abs", 96>;
934
935// Integer negation: neg
936defm NEG : SIMDUnaryInt<ivneg, "neg", 97>;
937
938// Population count: popcnt
939defm POPCNT : SIMDUnary<I8x16, ctpop, "popcnt", 0x62>;
940
941// Any lane true: any_true
942defm ANYTRUE : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), [],
943                      "v128.any_true\t$dst, $vec", "v128.any_true", 0x53>;
944
945foreach vec = IntVecs in
946def : Pat<(int_wasm_anytrue (vec.vt V128:$vec)), (ANYTRUE V128:$vec)>;
947
948// All lanes true: all_true
949multiclass SIMDAllTrue<Vec vec, bits<32> simdop> {
950  defm ALLTRUE_#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
951                             [(set I32:$dst,
952                               (i32 (int_wasm_alltrue (vec.vt V128:$vec))))],
953                             vec.prefix#".all_true\t$dst, $vec",
954                             vec.prefix#".all_true", simdop>;
955}
956
957defm "" : SIMDAllTrue<I8x16, 0x63>;
958defm "" : SIMDAllTrue<I16x8, 0x83>;
959defm "" : SIMDAllTrue<I32x4, 0xa3>;
960defm "" : SIMDAllTrue<I64x2, 0xc3>;
961
962// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
963// can be folded out
964foreach reduction =
965  [["int_wasm_anytrue", "ANYTRUE", "I8x16"],
966   ["int_wasm_anytrue", "ANYTRUE", "I16x8"],
967   ["int_wasm_anytrue", "ANYTRUE", "I32x4"],
968   ["int_wasm_anytrue", "ANYTRUE", "I64x2"],
969   ["int_wasm_alltrue", "ALLTRUE_I8x16", "I8x16"],
970   ["int_wasm_alltrue", "ALLTRUE_I16x8", "I16x8"],
971   ["int_wasm_alltrue", "ALLTRUE_I32x4", "I32x4"],
972   ["int_wasm_alltrue", "ALLTRUE_I64x2", "I64x2"]] in {
973defvar intrinsic = !cast<Intrinsic>(reduction[0]);
974defvar inst = !cast<NI>(reduction[1]);
975defvar vec = !cast<Vec>(reduction[2]);
976def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
977def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
978def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
979}
980
981multiclass SIMDBitmask<Vec vec, bits<32> simdop> {
982  defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
983                      [(set I32:$dst,
984                         (i32 (int_wasm_bitmask (vec.vt V128:$vec))))],
985                      vec.prefix#".bitmask\t$dst, $vec", vec.prefix#".bitmask",
986                      simdop>;
987}
988
989defm BITMASK : SIMDBitmask<I8x16, 100>;
990defm BITMASK : SIMDBitmask<I16x8, 132>;
991defm BITMASK : SIMDBitmask<I32x4, 164>;
992defm BITMASK : SIMDBitmask<I64x2, 196>;
993
994//===----------------------------------------------------------------------===//
995// Bit shifts
996//===----------------------------------------------------------------------===//
997
998multiclass SIMDShift<Vec vec, SDNode node, string name, bits<32> simdop> {
999  defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), (outs), (ins),
1000                      [(set (vec.vt V128:$dst), (node V128:$vec, I32:$x))],
1001                      vec.prefix#"."#name#"\t$dst, $vec, $x",
1002                      vec.prefix#"."#name, simdop>;
1003}
1004
1005multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
1006  defm "" : SIMDShift<I8x16, node, name, baseInst>;
1007  defm "" : SIMDShift<I16x8, node, name, !add(baseInst, 32)>;
1008  defm "" : SIMDShift<I32x4, node, name, !add(baseInst, 64)>;
1009  defm "" : SIMDShift<I64x2, node, name, !add(baseInst, 96)>;
1010}
1011
1012// WebAssembly SIMD shifts are nonstandard in that the shift amount is
1013// an i32 rather than a vector, so they need custom nodes.
1014def wasm_shift_t :
1015  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
1016def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
1017def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
1018def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
1019
1020// Left shift by scalar: shl
1021defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>;
1022
1023// Right shift by scalar: shr_s / shr_u
1024defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>;
1025defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>;
1026
1027// Optimize away an explicit mask on a shift count.
1028def : Pat<(wasm_shl (v16i8 V128:$lhs), (and I32:$rhs, 7)),
1029          (SHL_I8x16 V128:$lhs, I32:$rhs)>;
1030def : Pat<(wasm_shr_s (v16i8 V128:$lhs), (and I32:$rhs, 7)),
1031          (SHR_S_I8x16 V128:$lhs, I32:$rhs)>;
1032def : Pat<(wasm_shr_u (v16i8 V128:$lhs), (and I32:$rhs, 7)),
1033          (SHR_U_I8x16 V128:$lhs, I32:$rhs)>;
1034
1035def : Pat<(wasm_shl (v8i16 V128:$lhs), (and I32:$rhs, 15)),
1036          (SHL_I16x8 V128:$lhs, I32:$rhs)>;
1037def : Pat<(wasm_shr_s (v8i16 V128:$lhs), (and I32:$rhs, 15)),
1038          (SHR_S_I16x8 V128:$lhs, I32:$rhs)>;
1039def : Pat<(wasm_shr_u (v8i16 V128:$lhs), (and I32:$rhs, 15)),
1040          (SHR_U_I16x8 V128:$lhs, I32:$rhs)>;
1041
1042def : Pat<(wasm_shl (v4i32 V128:$lhs), (and I32:$rhs, 31)),
1043          (SHL_I32x4 V128:$lhs, I32:$rhs)>;
1044def : Pat<(wasm_shr_s (v4i32 V128:$lhs), (and I32:$rhs, 31)),
1045          (SHR_S_I32x4 V128:$lhs, I32:$rhs)>;
1046def : Pat<(wasm_shr_u (v4i32 V128:$lhs), (and I32:$rhs, 31)),
1047          (SHR_U_I32x4 V128:$lhs, I32:$rhs)>;
1048
1049def : Pat<(wasm_shl (v2i64 V128:$lhs), (and I32:$rhs, 63)),
1050          (SHL_I64x2 V128:$lhs, I32:$rhs)>;
1051def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (and I32:$rhs, 63)),
1052          (SHR_S_I64x2 V128:$lhs, I32:$rhs)>;
1053def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (and I32:$rhs, 63)),
1054          (SHR_U_I64x2 V128:$lhs, I32:$rhs)>;
1055def : Pat<(wasm_shl (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
1056          (SHL_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
1057def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
1058          (SHR_S_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
1059def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
1060          (SHR_U_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
1061
1062//===----------------------------------------------------------------------===//
1063// Integer binary arithmetic
1064//===----------------------------------------------------------------------===//
1065
1066multiclass SIMDBinaryIntNoI8x16<SDPatternOperator node, string name, bits<32> baseInst> {
1067  defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>;
1068  defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>;
1069  defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>;
1070}
1071
1072multiclass SIMDBinaryIntSmall<SDPatternOperator node, string name, bits<32> baseInst> {
1073  defm "" : SIMDBinary<I8x16, node, name, baseInst>;
1074  defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>;
1075}
1076
1077multiclass SIMDBinaryIntNoI64x2<SDPatternOperator node, string name, bits<32> baseInst> {
1078  defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
1079  defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>;
1080}
1081
1082multiclass SIMDBinaryInt<SDPatternOperator node, string name, bits<32> baseInst> {
1083  defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
1084  defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>;
1085}
1086
1087// Integer addition: add / add_sat_s / add_sat_u
1088let isCommutable = 1 in {
1089defm ADD : SIMDBinaryInt<add, "add", 110>;
1090defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_sat_s", 111>;
1091defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_sat_u", 112>;
1092} // isCommutable = 1
1093
1094// Integer subtraction: sub / sub_sat_s / sub_sat_u
1095defm SUB : SIMDBinaryInt<sub, "sub", 113>;
1096defm SUB_SAT_S :
1097  SIMDBinaryIntSmall<int_wasm_sub_sat_signed, "sub_sat_s", 114>;
1098defm SUB_SAT_U :
1099  SIMDBinaryIntSmall<int_wasm_sub_sat_unsigned, "sub_sat_u", 115>;
1100
1101// Integer multiplication: mul
1102let isCommutable = 1 in
1103defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>;
1104
1105// Integer min_s / min_u / max_s / max_u
1106let isCommutable = 1 in {
1107defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>;
1108defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>;
1109defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>;
1110defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>;
1111} // isCommutable = 1
1112
1113// Integer unsigned rounding average: avgr_u
1114let isCommutable = 1 in {
1115defm AVGR_U : SIMDBinary<I8x16, int_wasm_avgr_unsigned, "avgr_u", 123>;
1116defm AVGR_U : SIMDBinary<I16x8, int_wasm_avgr_unsigned, "avgr_u", 155>;
1117}
1118
1119def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), (add $lhs, $rhs),
1120                      "return N->getFlags().hasNoUnsignedWrap();">;
1121
1122foreach vec = [I8x16, I16x8] in {
1123defvar inst = !cast<NI>("AVGR_U_"#vec);
1124def : Pat<(wasm_shr_u
1125            (add_nuw
1126              (add_nuw (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
1127              (vec.splat (i32 1))),
1128            (i32 1)),
1129          (inst $lhs, $rhs)>;
1130}
1131
1132// Widening dot product: i32x4.dot_i16x8_s
1133let isCommutable = 1 in
1134defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
1135                  [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
1136                  "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
1137                  186>;
1138
1139// Extending multiplication: extmul_{low,high}_P, extmul_high
1140def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
1141def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>;
1142def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>;
1143def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>;
1144def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>;
1145
1146multiclass SIMDExtBinary<Vec vec, SDPatternOperator node, string name,
1147                         bits<32> simdop> {
1148  defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
1149                      (outs), (ins),
1150                      [(set (vec.vt V128:$dst), (node
1151                         (vec.split.vt V128:$lhs),(vec.split.vt V128:$rhs)))],
1152                      vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
1153                      vec.prefix#"."#name, simdop>;
1154}
1155
1156class ExtMulPat<SDNode extend> :
1157  PatFrag<(ops node:$lhs, node:$rhs),
1158          (mul (extend $lhs), (extend $rhs))> {}
1159
1160def extmul_low_s : ExtMulPat<extend_low_s>;
1161def extmul_high_s : ExtMulPat<extend_high_s>;
1162def extmul_low_u : ExtMulPat<extend_low_u>;
1163def extmul_high_u : ExtMulPat<extend_high_u>;
1164
1165defm EXTMUL_LOW_S :
1166  SIMDExtBinary<I16x8, extmul_low_s, "extmul_low_i8x16_s", 0x9c>;
1167defm EXTMUL_HIGH_S :
1168  SIMDExtBinary<I16x8, extmul_high_s, "extmul_high_i8x16_s", 0x9d>;
1169defm EXTMUL_LOW_U :
1170  SIMDExtBinary<I16x8, extmul_low_u, "extmul_low_i8x16_u", 0x9e>;
1171defm EXTMUL_HIGH_U :
1172  SIMDExtBinary<I16x8, extmul_high_u, "extmul_high_i8x16_u", 0x9f>;
1173
1174defm EXTMUL_LOW_S :
1175  SIMDExtBinary<I32x4, extmul_low_s, "extmul_low_i16x8_s", 0xbc>;
1176defm EXTMUL_HIGH_S :
1177  SIMDExtBinary<I32x4, extmul_high_s, "extmul_high_i16x8_s", 0xbd>;
1178defm EXTMUL_LOW_U :
1179  SIMDExtBinary<I32x4, extmul_low_u, "extmul_low_i16x8_u", 0xbe>;
1180defm EXTMUL_HIGH_U :
1181  SIMDExtBinary<I32x4, extmul_high_u, "extmul_high_i16x8_u", 0xbf>;
1182
1183defm EXTMUL_LOW_S :
1184  SIMDExtBinary<I64x2, extmul_low_s, "extmul_low_i32x4_s", 0xdc>;
1185defm EXTMUL_HIGH_S :
1186  SIMDExtBinary<I64x2, extmul_high_s, "extmul_high_i32x4_s", 0xdd>;
1187defm EXTMUL_LOW_U :
1188  SIMDExtBinary<I64x2, extmul_low_u, "extmul_low_i32x4_u", 0xde>;
1189defm EXTMUL_HIGH_U :
1190  SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>;
1191
1192//===----------------------------------------------------------------------===//
1193// Floating-point unary arithmetic
1194//===----------------------------------------------------------------------===//
1195
1196multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
1197  defm "" : SIMDUnary<F32x4, node, name, baseInst>;
1198  defm "" : SIMDUnary<F64x2, node, name, !add(baseInst, 12)>;
1199  // Unlike F32x4 and F64x2 there's not a gap in the opcodes between "neg" and
1200  // "sqrt" so subtract one from the offset.
1201  defm "" : HalfPrecisionUnary<F16x8, node, name,
1202                               !add(baseInst,!if(!eq(name, "sqrt"), 80, 81))>;
1203}
1204
1205// Absolute value: abs
1206defm ABS : SIMDUnaryFP<fabs, "abs", 224>;
1207
1208// Negation: neg
1209defm NEG : SIMDUnaryFP<fneg, "neg", 225>;
1210
1211// Square root: sqrt
1212defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>;
1213
1214// Rounding: ceil, floor, trunc, nearest
1215defm CEIL : SIMDUnary<F32x4, fceil, "ceil", 0x67>;
1216defm FLOOR : SIMDUnary<F32x4, ffloor, "floor", 0x68>;
1217defm TRUNC: SIMDUnary<F32x4, ftrunc, "trunc", 0x69>;
1218defm NEAREST: SIMDUnary<F32x4, fnearbyint, "nearest", 0x6a>;
1219defm CEIL : SIMDUnary<F64x2, fceil, "ceil", 0x74>;
1220defm FLOOR : SIMDUnary<F64x2, ffloor, "floor", 0x75>;
1221defm TRUNC: SIMDUnary<F64x2, ftrunc, "trunc", 0x7a>;
1222defm NEAREST: SIMDUnary<F64x2, fnearbyint, "nearest", 0x94>;
1223defm CEIL : HalfPrecisionUnary<F16x8, fceil, "ceil", 0x13c>;
1224defm FLOOR : HalfPrecisionUnary<F16x8, ffloor, "floor", 0x13d>;
1225defm TRUNC : HalfPrecisionUnary<F16x8, ftrunc, "trunc", 0x13e>;
1226defm NEAREST : HalfPrecisionUnary<F16x8, fnearbyint, "nearest", 0x13f>;
1227
1228// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint.
1229def : Pat<(v4f32 (frint (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>;
1230def : Pat<(v2f64 (frint (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>;
1231def : Pat<(v8f16 (frint (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>;
1232
1233// WebAssembly always rounds ties-to-even, so map froundeven to fnearbyint.
1234def : Pat<(v4f32 (froundeven (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>;
1235def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>;
1236def : Pat<(v8f16 (froundeven (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>;
1237
1238//===----------------------------------------------------------------------===//
1239// Floating-point binary arithmetic
1240//===----------------------------------------------------------------------===//
1241
1242multiclass SIMDBinaryFP<SDPatternOperator node, string name, bits<32> baseInst> {
1243  defm "" : SIMDBinary<F32x4, node, name, baseInst>;
1244  defm "" : SIMDBinary<F64x2, node, name, !add(baseInst, 12)>;
1245  defm "" : HalfPrecisionBinary<F16x8, node, name, !add(baseInst, 80)>;
1246}
1247
1248// Addition: add
1249let isCommutable = 1 in
1250defm ADD : SIMDBinaryFP<fadd, "add", 228>;
1251
1252// Subtraction: sub
1253defm SUB : SIMDBinaryFP<fsub, "sub", 229>;
1254
1255// Multiplication: mul
1256let isCommutable = 1 in
1257defm MUL : SIMDBinaryFP<fmul, "mul", 230>;
1258
1259// Division: div
1260defm DIV : SIMDBinaryFP<fdiv, "div", 231>;
1261
1262// NaN-propagating minimum: min
1263defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
1264
1265// NaN-propagating maximum: max
1266defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
1267
1268// Pseudo-minimum: pmin
1269def pmin : PatFrags<(ops node:$lhs, node:$rhs), [
1270                    (vselect (setolt $rhs, $lhs), $rhs, $lhs),
1271                    (vselect (setole $rhs, $lhs), $rhs, $lhs),
1272                    (vselect (setogt $lhs, $rhs), $rhs, $lhs),
1273                    (vselect (setoge $lhs, $rhs), $rhs, $lhs)
1274]>;
1275defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
1276
1277// Pseudo-maximum: pmax
1278def pmax : PatFrags<(ops node:$lhs, node:$rhs), [
1279                    (vselect (setogt $rhs, $lhs), $rhs, $lhs),
1280                    (vselect (setoge $rhs, $lhs), $rhs, $lhs),
1281                    (vselect (setolt $lhs, $rhs), $rhs, $lhs),
1282                    (vselect (setole $lhs, $rhs), $rhs, $lhs)
1283]>;
1284defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
1285
1286// Also match the pmin/pmax cases where the operands are int vectors (but the
1287// comparison is still a floating point comparison). This can happen when using
1288// the wasm_simd128.h intrinsics because v128_t is an integer vector.
1289foreach vec = [F32x4, F64x2, F16x8] in {
1290defvar pmin = !cast<NI>("PMIN_"#vec);
1291defvar pmax = !cast<NI>("PMAX_"#vec);
1292def : Pat<(vec.int_vt (vselect
1293            (setolt (vec.vt (bitconvert V128:$rhs)),
1294                    (vec.vt (bitconvert V128:$lhs))),
1295            V128:$rhs, V128:$lhs)),
1296          (pmin $lhs, $rhs)>;
1297def : Pat<(vec.int_vt (vselect
1298            (setolt (vec.vt (bitconvert V128:$lhs)),
1299                    (vec.vt (bitconvert V128:$rhs))),
1300            V128:$rhs, V128:$lhs)),
1301          (pmax $lhs, $rhs)>;
1302}
1303
1304// And match the pmin/pmax LLVM intrinsics as well
1305def : Pat<(v4f32 (int_wasm_pmin (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
1306          (PMIN_F32x4 V128:$lhs, V128:$rhs)>;
1307def : Pat<(v4f32 (int_wasm_pmax (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
1308          (PMAX_F32x4 V128:$lhs, V128:$rhs)>;
1309def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
1310          (PMIN_F64x2 V128:$lhs, V128:$rhs)>;
1311def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
1312          (PMAX_F64x2 V128:$lhs, V128:$rhs)>;
1313def : Pat<(v8f16 (int_wasm_pmin (v8f16 V128:$lhs), (v8f16 V128:$rhs))),
1314          (PMIN_F16x8 V128:$lhs, V128:$rhs)>;
1315def : Pat<(v8f16 (int_wasm_pmax (v8f16 V128:$lhs), (v8f16 V128:$rhs))),
1316          (PMAX_F16x8 V128:$lhs, V128:$rhs)>;
1317
1318//===----------------------------------------------------------------------===//
1319// Conversions
1320//===----------------------------------------------------------------------===//
1321
1322multiclass SIMDConvert<Vec vec, Vec arg, SDPatternOperator op, string name,
1323                       bits<32> simdop, list<Predicate> reqs = []> {
1324  defm op#_#vec :
1325    SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
1326           [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
1327           vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop, reqs>;
1328}
1329
1330multiclass HalfPrecisionConvert<Vec vec, Vec arg, SDPatternOperator op,
1331                                string name, bits<32> simdop> {
1332  defm "" : SIMDConvert<vec, arg, op, name, simdop, [HasHalfPrecision]>;
1333}
1334
1335// Floating point to integer with saturation: trunc_sat
1336defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>;
1337defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>;
1338defm "" : HalfPrecisionConvert<I16x8, F16x8, fp_to_sint, "trunc_sat_f16x8_s", 0x148>;
1339defm "" : HalfPrecisionConvert<I16x8, F16x8, fp_to_uint, "trunc_sat_f16x8_u", 0x149>;
1340
1341// Support the saturating variety as well.
1342def trunc_s_sat32 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i32)>;
1343def trunc_u_sat32 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i32)>;
1344def : Pat<(v4i32 (trunc_s_sat32 (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>;
1345def : Pat<(v4i32 (trunc_u_sat32 (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>;
1346
1347def trunc_sat_zero_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
1348def trunc_sat_zero_s :
1349  SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>;
1350def trunc_sat_zero_u :
1351  SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_U", trunc_sat_zero_t>;
1352defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_s, "trunc_sat_f64x2_s_zero",
1353                      0xfc>;
1354defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_u, "trunc_sat_f64x2_u_zero",
1355                      0xfd>;
1356
1357// Integer to floating point: convert
1358def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
1359def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>;
1360def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>;
1361defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
1362defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
1363defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>;
1364defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>;
1365defm "" : HalfPrecisionConvert<F16x8, I16x8, sint_to_fp, "convert_i16x8_s", 0x14a>;
1366defm "" : HalfPrecisionConvert<F16x8, I16x8, uint_to_fp, "convert_i16x8_u", 0x14b>;
1367
1368// Extending operations
1369// TODO: refactor this to be uniform for i64x2 if the numbering is not changed.
1370multiclass SIMDExtend<Vec vec, bits<32> baseInst> {
1371  defm "" : SIMDConvert<vec, vec.split, extend_low_s,
1372                        "extend_low_"#vec.split.prefix#"_s", baseInst>;
1373  defm "" : SIMDConvert<vec, vec.split, extend_high_s,
1374                        "extend_high_"#vec.split.prefix#"_s", !add(baseInst, 1)>;
1375  defm "" : SIMDConvert<vec, vec.split, extend_low_u,
1376                        "extend_low_"#vec.split.prefix#"_u", !add(baseInst, 2)>;
1377  defm "" : SIMDConvert<vec, vec.split, extend_high_u,
1378                        "extend_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>;
1379}
1380
1381defm "" : SIMDExtend<I16x8, 0x87>;
1382defm "" : SIMDExtend<I32x4, 0xa7>;
1383defm "" : SIMDExtend<I64x2, 0xc7>;
1384
1385// Narrowing operations
1386multiclass SIMDNarrow<Vec vec, bits<32> baseInst> {
1387  defvar name = vec.split.prefix#".narrow_"#vec.prefix;
1388  defm NARROW_S_#vec.split :
1389    SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
1390           [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_signed
1391             (vec.vt V128:$low), (vec.vt V128:$high))))],
1392           name#"_s\t$dst, $low, $high", name#"_s", baseInst>;
1393  defm NARROW_U_#vec.split :
1394    SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
1395           [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_unsigned
1396             (vec.vt V128:$low), (vec.vt V128:$high))))],
1397           name#"_u\t$dst, $low, $high", name#"_u", !add(baseInst, 1)>;
1398}
1399
1400defm "" : SIMDNarrow<I16x8, 101>;
1401defm "" : SIMDNarrow<I32x4, 133>;
1402
1403// WebAssemblyISD::NARROW_U
1404def wasm_narrow_t : SDTypeProfile<1, 2, []>;
1405def wasm_narrow_u : SDNode<"WebAssemblyISD::NARROW_U", wasm_narrow_t>;
1406def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))),
1407          (NARROW_U_I8x16 $left, $right)>;
1408def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))),
1409          (NARROW_U_I16x8 $left, $right)>;
1410
1411// Bitcasts are nops
1412// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
1413foreach t1 = AllVecs in
1414foreach t2 = AllVecs in
1415if !ne(t1, t2) then
1416def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>;
1417
1418// Extended pairwise addition
1419defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed,
1420                      "extadd_pairwise_i8x16_s", 0x7c>;
1421defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned,
1422                      "extadd_pairwise_i8x16_u", 0x7d>;
1423defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
1424                      "extadd_pairwise_i16x8_s", 0x7e>;
1425defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
1426                      "extadd_pairwise_i16x8_u", 0x7f>;
1427
1428// f64x2 <-> f32x4 conversions
1429def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
1430def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>;
1431defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
1432                      "demote_f64x2_zero", 0x5e>;
1433
1434def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
1435def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
1436defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
1437
1438// Lower extending loads to load64_zero + promote_low
1439def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
1440  let MemoryVT = v2f32;
1441}
1442// Adapted from the body of LoadPatNoOffset
1443// TODO: other addressing patterns
1444def : Pat<(v2f64 (extloadv2f32 (i32 I32:$addr))),
1445          (promote_low_F64x2 (LOAD_ZERO_I64x2_A32 0, 0, I32:$addr))>,
1446      Requires<[HasAddr32]>;
1447def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))),
1448          (promote_low_F64x2 (LOAD_ZERO_I64x2_A64 0, 0, I64:$addr))>,
1449      Requires<[HasAddr64]>;
1450
1451//===----------------------------------------------------------------------===//
1452// Saturating Rounding Q-Format Multiplication
1453//===----------------------------------------------------------------------===//
1454
1455defm Q15MULR_SAT_S :
1456  SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>;
1457
1458//===----------------------------------------------------------------------===//
1459// Relaxed swizzle
1460//===----------------------------------------------------------------------===//
1461
1462defm RELAXED_SWIZZLE :
1463  RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
1464         [(set (v16i8 V128:$dst),
1465           (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
1466         "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 0x100>;
1467
1468//===----------------------------------------------------------------------===//
1469// Relaxed floating-point to int conversions
1470//===----------------------------------------------------------------------===//
1471
1472multiclass RelaxedConvert<Vec vec, Vec arg, SDPatternOperator op, string name, bits<32> simdop> {
1473  defm op#_#vec :
1474    RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
1475              [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
1476              vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
1477}
1478
1479defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_signed,
1480                         "relaxed_trunc_f32x4_s", 0x101>;
1481defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_unsigned,
1482                         "relaxed_trunc_f32x4_u", 0x102>;
1483defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_signed_zero,
1484                         "relaxed_trunc_f64x2_s_zero", 0x103>;
1485defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
1486                         "relaxed_trunc_f64x2_u_zero", 0x104>;
1487
1488//===----------------------------------------------------------------------===//
1489// Relaxed (Negative) Multiply-Add  (madd/nmadd)
1490//===----------------------------------------------------------------------===//
1491
1492multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
1493  defm MADD_#vec :
1494    SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1495           [(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
1496             (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
1497           vec.prefix#".relaxed_madd\t$dst, $a, $b, $c",
1498           vec.prefix#".relaxed_madd", simdopA, reqs>;
1499  defm NMADD_#vec :
1500    SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1501           [(set (vec.vt V128:$dst), (int_wasm_relaxed_nmadd
1502             (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
1503           vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
1504           vec.prefix#".relaxed_nmadd", simdopS, reqs>;
1505}
1506
1507defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
1508defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
1509defm "" : SIMDMADD<F16x8, 0x146, 0x147, [HasHalfPrecision]>;
1510
1511//===----------------------------------------------------------------------===//
1512// Laneselect
1513//===----------------------------------------------------------------------===//
1514
1515multiclass SIMDLANESELECT<Vec vec, bits<32> op> {
1516  defm LANESELECT_#vec :
1517    RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1518              [(set (vec.vt V128:$dst), (int_wasm_relaxed_laneselect
1519                (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
1520              vec.prefix#".relaxed_laneselect\t$dst, $a, $b, $c",
1521              vec.prefix#".relaxed_laneselect", op>;
1522}
1523
1524defm "" : SIMDLANESELECT<I8x16, 0x109>;
1525defm "" : SIMDLANESELECT<I16x8, 0x10a>;
1526defm "" : SIMDLANESELECT<I32x4, 0x10b>;
1527defm "" : SIMDLANESELECT<I64x2, 0x10c>;
1528
1529//===----------------------------------------------------------------------===//
1530// Relaxed floating-point min and max.
1531//===----------------------------------------------------------------------===//
1532
1533multiclass RelaxedBinary<Vec vec, SDPatternOperator node, string name,
1534                         bits<32> simdop> {
1535  defm _#vec : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
1536                         (outs), (ins),
1537                         [(set (vec.vt V128:$dst),
1538                           (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))],
1539                         vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
1540                         vec.prefix#"."#name, simdop>;
1541}
1542
1543defm SIMD_RELAXED_FMIN :
1544   RelaxedBinary<F32x4, int_wasm_relaxed_min, "relaxed_min", 0x10d>;
1545defm SIMD_RELAXED_FMAX :
1546   RelaxedBinary<F32x4, int_wasm_relaxed_max, "relaxed_max", 0x10e>;
1547defm SIMD_RELAXED_FMIN :
1548   RelaxedBinary<F64x2, int_wasm_relaxed_min, "relaxed_min", 0x10f>;
1549defm SIMD_RELAXED_FMAX :
1550   RelaxedBinary<F64x2, int_wasm_relaxed_max, "relaxed_max", 0x110>;
1551
1552//===----------------------------------------------------------------------===//
1553// Relaxed rounding q15 multiplication
1554//===----------------------------------------------------------------------===//
1555
1556defm RELAXED_Q15MULR_S :
1557  RelaxedBinary<I16x8, int_wasm_relaxed_q15mulr_signed, "relaxed_q15mulr_s",
1558                0x111>;
1559
1560//===----------------------------------------------------------------------===//
1561// Relaxed integer dot product
1562//===----------------------------------------------------------------------===//
1563
1564defm RELAXED_DOT :
1565  RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
1566            [(set (v8i16 V128:$dst), (int_wasm_relaxed_dot_i8x16_i7x16_signed
1567               (v16i8 V128:$lhs), (v16i8 V128:$rhs)))],
1568            "i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs",
1569            "i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>;
1570
1571defm RELAXED_DOT_ADD :
1572  RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
1573            (outs), (ins),
1574            [(set (v4i32 V128:$dst), (int_wasm_relaxed_dot_i8x16_i7x16_add_signed
1575               (v16i8 V128:$lhs), (v16i8 V128:$rhs), (v4i32 V128:$acc)))],
1576            "i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
1577            "i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>;
1578
1579//===----------------------------------------------------------------------===//
1580// Relaxed BFloat16 dot product
1581//===----------------------------------------------------------------------===//
1582
1583defm RELAXED_DOT_BFLOAT :
1584  RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
1585            (outs), (ins),
1586            [(set (v4f32 V128:$dst), (int_wasm_relaxed_dot_bf16x8_add_f32
1587               (v8i16 V128:$lhs), (v8i16 V128:$rhs), (v4f32 V128:$acc)))],
1588            "f32x4.relaxed_dot_bf16x8_add_f32\t$dst, $lhs, $rhs, $acc",
1589            "f32x4.relaxed_dot_bf16x8_add_f32", 0x114>;
1590