1// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// 9/// \file 10/// WebAssembly SIMD operand code-gen constructs. 11/// 12//===----------------------------------------------------------------------===// 13 14// Instructions using the SIMD opcode prefix and requiring one of the SIMD 15// feature predicates. 16multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 17 list<dag> pattern_r, string asmstr_r, 18 string asmstr_s, bits<32> simdop, 19 list<Predicate> reqs> { 20 defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, 21 !if(!ge(simdop, 0x100), 22 !or(0xfd0000, !and(0xffff, simdop)), 23 !or(0xfd00, !and(0xff, simdop)))>, 24 Requires<reqs>; 25} 26 27multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 28 list<dag> pattern_r, string asmstr_r = "", 29 string asmstr_s = "", bits<32> simdop = -1, 30 list<Predicate> reqs = []> { 31 defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, 32 asmstr_s, simdop, !listconcat([HasSIMD128], reqs)>; 33} 34 35multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 36 list<dag> pattern_r, string asmstr_r = "", 37 string asmstr_s = "", bits<32> simdop = -1> { 38 defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, 39 asmstr_s, simdop, [HasRelaxedSIMD]>; 40} 41 42multiclass HALF_PRECISION_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 43 list<dag> pattern_r, string asmstr_r = "", 44 string asmstr_s = "", bits<32> simdop = -1> { 45 defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, 46 asmstr_s, simdop, [HasHalfPrecision]>; 47} 48 49 50defm "" : ARGUMENT<V128, v16i8>; 51defm "" : ARGUMENT<V128, v8i16>; 52defm "" : ARGUMENT<V128, v4i32>; 53defm "" : ARGUMENT<V128, v2i64>; 54defm "" : ARGUMENT<V128, v4f32>; 55defm "" : ARGUMENT<V128, v2f64>; 56defm "" : ARGUMENT<V128, v8f16>; 57 58// Constrained immediate argument types. Allow any value from the minimum signed 59// value to the maximum unsigned value for the lane size. 60foreach SIZE = [8, 16] in 61def ImmI#SIZE : ImmLeaf<i32, 62 // -2^(n-1) <= Imm < 2^n 63 "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << "#SIZE#");" 64>; 65foreach SIZE = [2, 4, 8, 16, 32] in 66def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; 67 68class Vec { 69 ValueType vt; 70 ValueType int_vt; 71 ValueType lane_vt; 72 WebAssemblyRegClass lane_rc; 73 int lane_bits; 74 ImmLeaf lane_idx; 75 SDPatternOperator lane_load; 76 PatFrag splat; 77 string prefix; 78 Vec split; 79} 80 81def I8x16 : Vec { 82 let vt = v16i8; 83 let int_vt = vt; 84 let lane_vt = i32; 85 let lane_rc = I32; 86 let lane_bits = 8; 87 let lane_idx = LaneIdx16; 88 let lane_load = extloadi8; 89 let splat = PatFrag<(ops node:$x), (v16i8 (splat_vector (i8 $x)))>; 90 let prefix = "i8x16"; 91} 92 93def I16x8 : Vec { 94 let vt = v8i16; 95 let int_vt = vt; 96 let lane_vt = i32; 97 let lane_rc = I32; 98 let lane_bits = 16; 99 let lane_idx = LaneIdx8; 100 let lane_load = extloadi16; 101 let splat = PatFrag<(ops node:$x), (v8i16 (splat_vector (i16 $x)))>; 102 let prefix = "i16x8"; 103 let split = I8x16; 104} 105 106def I32x4 : Vec { 107 let vt = v4i32; 108 let int_vt = vt; 109 let lane_vt = i32; 110 let lane_rc = I32; 111 let lane_bits = 32; 112 let lane_idx = LaneIdx4; 113 let lane_load = load; 114 let splat = PatFrag<(ops node:$x), (v4i32 (splat_vector (i32 $x)))>; 115 let prefix = "i32x4"; 116 let split = I16x8; 117} 118 119def I64x2 : Vec { 120 let vt = v2i64; 121 let int_vt = vt; 122 let lane_vt = i64; 123 let lane_rc = I64; 124 let lane_bits = 64; 125 let lane_idx = LaneIdx2; 126 let lane_load = load; 127 let splat = PatFrag<(ops node:$x), (v2i64 (splat_vector (i64 $x)))>; 128 let prefix = "i64x2"; 129 let split = I32x4; 130} 131 132def F32x4 : Vec { 133 let vt = v4f32; 134 let int_vt = v4i32; 135 let lane_vt = f32; 136 let lane_rc = F32; 137 let lane_bits = 32; 138 let lane_idx = LaneIdx4; 139 let lane_load = load; 140 let splat = PatFrag<(ops node:$x), (v4f32 (splat_vector (f32 $x)))>; 141 let prefix = "f32x4"; 142} 143 144def F64x2 : Vec { 145 let vt = v2f64; 146 let int_vt = v2i64; 147 let lane_vt = f64; 148 let lane_rc = F64; 149 let lane_bits = 64; 150 let lane_idx = LaneIdx2; 151 let lane_load = load; 152 let splat = PatFrag<(ops node:$x), (v2f64 (splat_vector (f64 $x)))>; 153 let prefix = "f64x2"; 154} 155 156def F16x8 : Vec { 157 let vt = v8f16; 158 let int_vt = v8i16; 159 let lane_vt = f32; 160 let lane_rc = F32; 161 let lane_bits = 16; 162 let lane_idx = LaneIdx8; 163 let lane_load = int_wasm_loadf16_f32; 164 let splat = PatFrag<(ops node:$x), (v8f16 (splat_vector (f16 $x)))>; 165 let prefix = "f16x8"; 166} 167 168// TODO: Include F16x8 here when half precision is better supported. 169defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; 170defvar IntVecs = [I8x16, I16x8, I32x4, I64x2]; 171 172//===----------------------------------------------------------------------===// 173// Load and store 174//===----------------------------------------------------------------------===// 175 176// Load: v128.load 177let mayLoad = 1, UseNamedOperandTable = 1 in { 178defm LOAD_V128_A32 : 179 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 180 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 181 "v128.load\t$dst, ${off}(${addr})$p2align", 182 "v128.load\t$off$p2align", 0>; 183defm LOAD_V128_A64 : 184 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 185 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 186 "v128.load\t$dst, ${off}(${addr})$p2align", 187 "v128.load\t$off$p2align", 0>; 188} 189 190// Def load patterns from WebAssemblyInstrMemory.td for vector types 191foreach vec = AllVecs in { 192defm : LoadPat<vec.vt, load, "LOAD_V128">; 193} 194 195// v128.loadX_splat 196multiclass SIMDLoadSplat<int size, bits<32> simdop> { 197 let mayLoad = 1, UseNamedOperandTable = 1 in { 198 defm LOAD#size#_SPLAT_A32 : 199 SIMD_I<(outs V128:$dst), 200 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 201 (outs), 202 (ins P2Align:$p2align, offset32_op:$off), [], 203 "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", 204 "v128.load"#size#"_splat\t$off$p2align", simdop>; 205 defm LOAD#size#_SPLAT_A64 : 206 SIMD_I<(outs V128:$dst), 207 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 208 (outs), 209 (ins P2Align:$p2align, offset64_op:$off), [], 210 "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", 211 "v128.load"#size#"_splat\t$off$p2align", simdop>; 212 } 213} 214 215defm "" : SIMDLoadSplat<8, 7>; 216defm "" : SIMDLoadSplat<16, 8>; 217defm "" : SIMDLoadSplat<32, 9>; 218defm "" : SIMDLoadSplat<64, 10>; 219 220foreach vec = AllVecs in { 221 defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; 222 defm : LoadPat<vec.vt, 223 PatFrag<(ops node:$addr), (splat_vector (vec.lane_vt (vec.lane_load node:$addr)))>, 224 inst>; 225} 226 227// Load and extend 228multiclass SIMDLoadExtend<Vec vec, string loadPat, bits<32> simdop> { 229 defvar signed = vec.prefix#".load"#loadPat#"_s"; 230 defvar unsigned = vec.prefix#".load"#loadPat#"_u"; 231 let mayLoad = 1, UseNamedOperandTable = 1 in { 232 defm LOAD_EXTEND_S_#vec#_A32 : 233 SIMD_I<(outs V128:$dst), 234 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 235 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 236 signed#"\t$dst, ${off}(${addr})$p2align", 237 signed#"\t$off$p2align", simdop>; 238 defm LOAD_EXTEND_U_#vec#_A32 : 239 SIMD_I<(outs V128:$dst), 240 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 241 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 242 unsigned#"\t$dst, ${off}(${addr})$p2align", 243 unsigned#"\t$off$p2align", !add(simdop, 1)>; 244 defm LOAD_EXTEND_S_#vec#_A64 : 245 SIMD_I<(outs V128:$dst), 246 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 247 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 248 signed#"\t$dst, ${off}(${addr})$p2align", 249 signed#"\t$off$p2align", simdop>; 250 defm LOAD_EXTEND_U_#vec#_A64 : 251 SIMD_I<(outs V128:$dst), 252 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 253 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 254 unsigned#"\t$dst, ${off}(${addr})$p2align", 255 unsigned#"\t$off$p2align", !add(simdop, 1)>; 256 } 257} 258 259defm "" : SIMDLoadExtend<I16x8, "8x8", 1>; 260defm "" : SIMDLoadExtend<I32x4, "16x4", 3>; 261defm "" : SIMDLoadExtend<I64x2, "32x2", 5>; 262 263foreach vec = [I16x8, I32x4, I64x2] in 264foreach exts = [["sextloadvi", "_S"], 265 ["zextloadvi", "_U"], 266 ["extloadvi", "_U"]] in { 267defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits); 268defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec; 269defm : LoadPat<vec.vt, loadpat, inst>; 270} 271 272// Load lane into zero vector 273multiclass SIMDLoadZero<Vec vec, bits<32> simdop> { 274 defvar name = "v128.load"#vec.lane_bits#"_zero"; 275 let mayLoad = 1, UseNamedOperandTable = 1 in { 276 defm LOAD_ZERO_#vec#_A32 : 277 SIMD_I<(outs V128:$dst), 278 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 279 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 280 name#"\t$dst, ${off}(${addr})$p2align", 281 name#"\t$off$p2align", simdop>; 282 defm LOAD_ZERO_#vec#_A64 : 283 SIMD_I<(outs V128:$dst), 284 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 285 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 286 name#"\t$dst, ${off}(${addr})$p2align", 287 name#"\t$off$p2align", simdop>; 288 } // mayLoad = 1, UseNamedOperandTable = 1 289} 290 291defm "" : SIMDLoadZero<I32x4, 0x5c>; 292defm "" : SIMDLoadZero<I64x2, 0x5d>; 293 294// Use load_zero to load scalars into vectors as well where possible. 295// TODO: i16, and i8 scalars 296foreach vec = [I32x4, I64x2] in { 297 defvar inst = "LOAD_ZERO_"#vec; 298 defvar pat = PatFrag<(ops node:$addr), (scalar_to_vector (vec.lane_vt (load $addr)))>; 299 defm : LoadPat<vec.vt, pat, inst>; 300} 301 302// TODO: f32x4 and f64x2 as well 303foreach vec = [I32x4, I64x2] in { 304 defvar inst = "LOAD_ZERO_"#vec; 305 defvar pat = PatFrag<(ops node:$ptr), 306 (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>; 307 defm : LoadPat<vec.vt, pat, inst>; 308} 309 310// Load lane 311multiclass SIMDLoadLane<Vec vec, bits<32> simdop> { 312 defvar name = "v128.load"#vec.lane_bits#"_lane"; 313 let mayLoad = 1, UseNamedOperandTable = 1 in { 314 defm LOAD_LANE_#vec#_A32 : 315 SIMD_I<(outs V128:$dst), 316 (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, 317 I32:$addr, V128:$vec), 318 (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), 319 [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", 320 name#"\t$off$p2align, $idx", simdop>; 321 defm LOAD_LANE_#vec#_A64 : 322 SIMD_I<(outs V128:$dst), 323 (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, 324 I64:$addr, V128:$vec), 325 (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), 326 [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", 327 name#"\t$off$p2align, $idx", simdop>; 328 } // mayLoad = 1, UseNamedOperandTable = 1 329} 330 331defm "" : SIMDLoadLane<I8x16, 0x54>; 332defm "" : SIMDLoadLane<I16x8, 0x55>; 333defm "" : SIMDLoadLane<I32x4, 0x56>; 334defm "" : SIMDLoadLane<I64x2, 0x57>; 335 336// Select loads with no constant offset. 337multiclass LoadLanePatNoOffset<Vec vec, SDPatternOperator kind> { 338 defvar load_lane_a32 = !cast<NI>("LOAD_LANE_"#vec#"_A32"); 339 defvar load_lane_a64 = !cast<NI>("LOAD_LANE_"#vec#"_A64"); 340 def : Pat<(vec.vt (kind (i32 I32:$addr), 341 (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))), 342 (load_lane_a32 0, 0, imm:$idx, $addr, $vec)>, 343 Requires<[HasAddr32]>; 344 def : Pat<(vec.vt (kind (i64 I64:$addr), 345 (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))), 346 (load_lane_a64 0, 0, imm:$idx, $addr, $vec)>, 347 Requires<[HasAddr64]>; 348} 349 350def load8_lane : 351 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 352 (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>; 353def load16_lane : 354 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 355 (vector_insert $vec, (i32 (extloadi16 $ptr)), $idx)>; 356def load32_lane : 357 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 358 (vector_insert $vec, (i32 (load $ptr)), $idx)>; 359def load64_lane : 360 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 361 (vector_insert $vec, (i64 (load $ptr)), $idx)>; 362// TODO: floating point lanes as well 363 364defm : LoadLanePatNoOffset<I8x16, load8_lane>; 365defm : LoadLanePatNoOffset<I16x8, load16_lane>; 366defm : LoadLanePatNoOffset<I32x4, load32_lane>; 367defm : LoadLanePatNoOffset<I64x2, load64_lane>; 368 369// TODO: Also support the other load patterns for load_lane once the instructions 370// are merged to the proposal. 371 372// Store: v128.store 373let mayStore = 1, UseNamedOperandTable = 1 in { 374defm STORE_V128_A32 : 375 SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), 376 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 377 "v128.store\t${off}(${addr})$p2align, $vec", 378 "v128.store\t$off$p2align", 11>; 379defm STORE_V128_A64 : 380 SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec), 381 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 382 "v128.store\t${off}(${addr})$p2align, $vec", 383 "v128.store\t$off$p2align", 11>; 384} 385 386// Def store patterns from WebAssemblyInstrMemory.td for vector types 387foreach vec = AllVecs in { 388defm : StorePat<vec.vt, store, "STORE_V128">; 389} 390 391// Store lane 392multiclass SIMDStoreLane<Vec vec, bits<32> simdop> { 393 defvar name = "v128.store"#vec.lane_bits#"_lane"; 394 let mayStore = 1, UseNamedOperandTable = 1 in { 395 defm STORE_LANE_#vec#_A32 : 396 SIMD_I<(outs), 397 (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, 398 I32:$addr, V128:$vec), 399 (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), 400 [], name#"\t${off}(${addr})$p2align, $vec, $idx", 401 name#"\t$off$p2align, $idx", simdop>; 402 defm STORE_LANE_#vec#_A64 : 403 SIMD_I<(outs), 404 (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, 405 I64:$addr, V128:$vec), 406 (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), 407 [], name#"\t${off}(${addr})$p2align, $vec, $idx", 408 name#"\t$off$p2align, $idx", simdop>; 409 } // mayStore = 1, UseNamedOperandTable = 1 410} 411 412defm "" : SIMDStoreLane<I8x16, 0x58>; 413defm "" : SIMDStoreLane<I16x8, 0x59>; 414defm "" : SIMDStoreLane<I32x4, 0x5a>; 415defm "" : SIMDStoreLane<I64x2, 0x5b>; 416 417multiclass StoreLanePat<Vec vec, SDPatternOperator kind> { 418 def : Pat<(kind (AddrOps32 offset32_op:$offset, I32:$addr), 419 (vec.vt V128:$vec), 420 (i32 vec.lane_idx:$idx)), 421 (!cast<NI>("STORE_LANE_"#vec#"_A32") 0, $offset, imm:$idx, $addr, $vec)>, 422 Requires<[HasAddr32]>; 423 def : Pat<(kind (AddrOps64 offset64_op:$offset, I64:$addr), 424 (vec.vt V128:$vec), 425 (i32 vec.lane_idx:$idx)), 426 (!cast<NI>("STORE_LANE_"#vec#"_A64") 0, $offset, imm:$idx, $addr, $vec)>, 427 Requires<[HasAddr64]>; 428} 429 430def store8_lane : 431 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 432 (truncstorei8 (i32 (vector_extract $vec, $idx)), $ptr)>; 433def store16_lane : 434 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 435 (truncstorei16 (i32 (vector_extract $vec, $idx)), $ptr)>; 436def store32_lane : 437 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 438 (store (i32 (vector_extract $vec, $idx)), $ptr)>; 439def store64_lane : 440 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 441 (store (i64 (vector_extract $vec, $idx)), $ptr)>; 442// TODO: floating point lanes as well 443 444let AddedComplexity = 1 in { 445defm : StoreLanePat<I8x16, store8_lane>; 446defm : StoreLanePat<I16x8, store16_lane>; 447defm : StoreLanePat<I32x4, store32_lane>; 448defm : StoreLanePat<I64x2, store64_lane>; 449} 450 451//===----------------------------------------------------------------------===// 452// Constructing SIMD values 453//===----------------------------------------------------------------------===// 454 455// Constant: v128.const 456multiclass ConstVec<Vec vec, dag ops, dag pat, string args> { 457 let isMoveImm = 1, isReMaterializable = 1 in 458 defm CONST_V128_#vec : SIMD_I<(outs V128:$dst), ops, (outs), ops, 459 [(set V128:$dst, (vec.vt pat))], 460 "v128.const\t$dst, "#args, 461 "v128.const\t"#args, 12>; 462} 463 464defm "" : ConstVec<I8x16, 465 (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1, 466 vec_i8imm_op:$i2, vec_i8imm_op:$i3, 467 vec_i8imm_op:$i4, vec_i8imm_op:$i5, 468 vec_i8imm_op:$i6, vec_i8imm_op:$i7, 469 vec_i8imm_op:$i8, vec_i8imm_op:$i9, 470 vec_i8imm_op:$iA, vec_i8imm_op:$iB, 471 vec_i8imm_op:$iC, vec_i8imm_op:$iD, 472 vec_i8imm_op:$iE, vec_i8imm_op:$iF), 473 (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3, 474 ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7, 475 ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB, 476 ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF), 477 !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ", 478 "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>; 479defm "" : ConstVec<I16x8, 480 (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1, 481 vec_i16imm_op:$i2, vec_i16imm_op:$i3, 482 vec_i16imm_op:$i4, vec_i16imm_op:$i5, 483 vec_i16imm_op:$i6, vec_i16imm_op:$i7), 484 (build_vector 485 ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3, 486 ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7), 487 "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">; 488let IsCanonical = 1 in 489defm "" : ConstVec<I32x4, 490 (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1, 491 vec_i32imm_op:$i2, vec_i32imm_op:$i3), 492 (build_vector (i32 imm:$i0), (i32 imm:$i1), 493 (i32 imm:$i2), (i32 imm:$i3)), 494 "$i0, $i1, $i2, $i3">; 495defm "" : ConstVec<I64x2, 496 (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1), 497 (build_vector (i64 imm:$i0), (i64 imm:$i1)), 498 "$i0, $i1">; 499defm "" : ConstVec<F32x4, 500 (ins f32imm_op:$i0, f32imm_op:$i1, 501 f32imm_op:$i2, f32imm_op:$i3), 502 (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1), 503 (f32 fpimm:$i2), (f32 fpimm:$i3)), 504 "$i0, $i1, $i2, $i3">; 505defm "" : ConstVec<F64x2, 506 (ins f64imm_op:$i0, f64imm_op:$i1), 507 (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)), 508 "$i0, $i1">; 509 510// Match splat(x) -> const.v128(x, ..., x) 511foreach vec = AllVecs in { 512 defvar numEls = !div(vec.vt.Size, vec.lane_bits); 513 defvar isFloat = !or(!eq(vec.lane_vt, f32), !eq(vec.lane_vt, f64)); 514 defvar immKind = !if(isFloat, fpimm, imm); 515 def : Pat<(vec.splat (vec.lane_vt immKind:$x)), 516 !dag(!cast<NI>("CONST_V128_"#vec), 517 !listsplat((vec.lane_vt immKind:$x), numEls), 518 ?)>; 519} 520 521// Shuffle lanes: shuffle 522defm SHUFFLE : 523 SIMD_I<(outs V128:$dst), 524 (ins V128:$x, V128:$y, 525 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 526 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 527 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 528 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 529 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 530 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 531 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 532 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 533 (outs), 534 (ins 535 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 536 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 537 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 538 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 539 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 540 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 541 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 542 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 543 [], 544 "i8x16.shuffle\t$dst, $x, $y, "# 545 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 546 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 547 "i8x16.shuffle\t"# 548 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 549 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 550 13>; 551 552// Shuffles after custom lowering 553def wasm_shuffle_t : SDTypeProfile<1, 18, []>; 554def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; 555foreach vec = AllVecs in { 556// The @llvm.wasm.shuffle intrinsic has immediate arguments that become TargetConstants. 557def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y), 558 (i32 timm:$m0), (i32 timm:$m1), 559 (i32 timm:$m2), (i32 timm:$m3), 560 (i32 timm:$m4), (i32 timm:$m5), 561 (i32 timm:$m6), (i32 timm:$m7), 562 (i32 timm:$m8), (i32 timm:$m9), 563 (i32 timm:$mA), (i32 timm:$mB), 564 (i32 timm:$mC), (i32 timm:$mD), 565 (i32 timm:$mE), (i32 timm:$mF))), 566 (SHUFFLE $x, $y, 567 imm:$m0, imm:$m1, imm:$m2, imm:$m3, 568 imm:$m4, imm:$m5, imm:$m6, imm:$m7, 569 imm:$m8, imm:$m9, imm:$mA, imm:$mB, 570 imm:$mC, imm:$mD, imm:$mE, imm:$mF)>; 571// Normal shufflevector instructions may have normal constant arguemnts. 572def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y), 573 (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), 574 (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), 575 (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), 576 (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), 577 (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), 578 (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), 579 (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), 580 (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))), 581 (SHUFFLE $x, $y, 582 imm:$m0, imm:$m1, imm:$m2, imm:$m3, 583 imm:$m4, imm:$m5, imm:$m6, imm:$m7, 584 imm:$m8, imm:$m9, imm:$mA, imm:$mB, 585 imm:$mC, imm:$mD, imm:$mE, imm:$mF)>; 586} 587 588// Swizzle lanes: i8x16.swizzle 589def wasm_swizzle_t : SDTypeProfile<1, 2, []>; 590def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; 591defm SWIZZLE : 592 SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), 593 [(set (v16i8 V128:$dst), 594 (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], 595 "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>; 596 597def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), 598 (SWIZZLE $src, $mask)>; 599 600multiclass Splat<Vec vec, bits<32> simdop> { 601 defm SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins vec.lane_rc:$x), 602 (outs), (ins), 603 [(set (vec.vt V128:$dst), 604 (vec.splat vec.lane_rc:$x))], 605 vec.prefix#".splat\t$dst, $x", vec.prefix#".splat", 606 simdop>; 607} 608 609defm "" : Splat<I8x16, 15>; 610defm "" : Splat<I16x8, 16>; 611defm "" : Splat<I32x4, 17>; 612defm "" : Splat<I64x2, 18>; 613defm "" : Splat<F32x4, 19>; 614defm "" : Splat<F64x2, 20>; 615 616// Half values are not fully supported so an intrinsic is used instead of a 617// regular Splat pattern as above. 618defm SPLAT_F16x8 : 619 HALF_PRECISION_I<(outs V128:$dst), (ins F32:$x), 620 (outs), (ins), 621 [(set (v8f16 V128:$dst), (int_wasm_splat_f16x8 F32:$x))], 622 "f16x8.splat\t$dst, $x", "f16x8.splat", 0x120>; 623 624// scalar_to_vector leaves high lanes undefined, so can be a splat 625foreach vec = AllVecs in 626def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))), 627 (!cast<Instruction>("SPLAT_"#vec) $x)>; 628 629//===----------------------------------------------------------------------===// 630// Accessing lanes 631//===----------------------------------------------------------------------===// 632 633// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u 634multiclass ExtractLane<Vec vec, bits<32> simdop, string suffix = ""> { 635 defm EXTRACT_LANE_#vec#suffix : 636 SIMD_I<(outs vec.lane_rc:$dst), (ins V128:$vec, vec_i8imm_op:$idx), 637 (outs), (ins vec_i8imm_op:$idx), [], 638 vec.prefix#".extract_lane"#suffix#"\t$dst, $vec, $idx", 639 vec.prefix#".extract_lane"#suffix#"\t$idx", simdop>; 640} 641 642defm "" : ExtractLane<I8x16, 21, "_s">; 643defm "" : ExtractLane<I8x16, 22, "_u">; 644defm "" : ExtractLane<I16x8, 24, "_s">; 645defm "" : ExtractLane<I16x8, 25, "_u">; 646defm "" : ExtractLane<I32x4, 27>; 647defm "" : ExtractLane<I64x2, 29>; 648defm "" : ExtractLane<F32x4, 31>; 649defm "" : ExtractLane<F64x2, 33>; 650 651def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), 652 (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>; 653def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), 654 (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>; 655def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)), 656 (EXTRACT_LANE_I32x4 $vec, imm:$idx)>; 657def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)), 658 (EXTRACT_LANE_F32x4 $vec, imm:$idx)>; 659def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)), 660 (EXTRACT_LANE_I64x2 $vec, imm:$idx)>; 661def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)), 662 (EXTRACT_LANE_F64x2 $vec, imm:$idx)>; 663 664def : Pat< 665 (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8), 666 (EXTRACT_LANE_I8x16_s $vec, imm:$idx)>; 667def : Pat< 668 (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)), 669 (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>; 670def : Pat< 671 (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16), 672 (EXTRACT_LANE_I16x8_s $vec, imm:$idx)>; 673def : Pat< 674 (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)), 675 (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>; 676 677defm EXTRACT_LANE_F16x8 : 678 HALF_PRECISION_I<(outs F32:$dst), (ins V128:$vec, vec_i8imm_op:$idx), 679 (outs), (ins vec_i8imm_op:$idx), 680 [(set (f32 F32:$dst), (int_wasm_extract_lane_f16x8 681 (v8f16 V128:$vec), (i32 LaneIdx16:$idx)))], 682 "f16x8.extract_lane\t$dst, $vec, $idx", 683 "f16x8.extract_lane\t$idx", 0x121>; 684 685// Replace lane value: replace_lane 686multiclass ReplaceLane<Vec vec, bits<32> simdop> { 687 defm REPLACE_LANE_#vec : 688 SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, vec.lane_rc:$x), 689 (outs), (ins vec_i8imm_op:$idx), 690 [(set V128:$dst, (vector_insert 691 (vec.vt V128:$vec), 692 (vec.lane_vt vec.lane_rc:$x), 693 (i32 vec.lane_idx:$idx)))], 694 vec.prefix#".replace_lane\t$dst, $vec, $idx, $x", 695 vec.prefix#".replace_lane\t$idx", simdop>; 696} 697 698defm "" : ReplaceLane<I8x16, 23>; 699defm "" : ReplaceLane<I16x8, 26>; 700defm "" : ReplaceLane<I32x4, 28>; 701defm "" : ReplaceLane<I64x2, 30>; 702defm "" : ReplaceLane<F32x4, 32>; 703defm "" : ReplaceLane<F64x2, 34>; 704 705// Lower undef lane indices to zero 706def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), 707 (REPLACE_LANE_I8x16 $vec, 0, $x)>; 708def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef), 709 (REPLACE_LANE_I16x8 $vec, 0, $x)>; 710def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef), 711 (REPLACE_LANE_I32x4 $vec, 0, $x)>; 712def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef), 713 (REPLACE_LANE_I64x2 $vec, 0, $x)>; 714def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef), 715 (REPLACE_LANE_F32x4 $vec, 0, $x)>; 716def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef), 717 (REPLACE_LANE_F64x2 $vec, 0, $x)>; 718 719//===----------------------------------------------------------------------===// 720// Comparisons 721//===----------------------------------------------------------------------===// 722 723multiclass SIMDCondition<Vec vec, string name, CondCode cond, bits<32> simdop, 724 list<Predicate> reqs = []> { 725 defm _#vec : 726 SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 727 [(set (vec.int_vt V128:$dst), 728 (setcc (vec.vt V128:$lhs), (vec.vt V128:$rhs), cond))], 729 vec.prefix#"."#name#"\t$dst, $lhs, $rhs", 730 vec.prefix#"."#name, simdop, reqs>; 731} 732 733multiclass HalfPrecisionCondition<Vec vec, string name, CondCode cond, 734 bits<32> simdop> { 735 defm "" : SIMDCondition<vec, name, cond, simdop, [HasHalfPrecision]>; 736} 737 738multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> { 739 defm "" : SIMDCondition<I8x16, name, cond, baseInst>; 740 defm "" : SIMDCondition<I16x8, name, cond, !add(baseInst, 10)>; 741 defm "" : SIMDCondition<I32x4, name, cond, !add(baseInst, 20)>; 742} 743 744multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> { 745 defm "" : SIMDCondition<F32x4, name, cond, baseInst>; 746 defm "" : SIMDCondition<F64x2, name, cond, !add(baseInst, 6)>; 747 defm "" : HalfPrecisionCondition<F16x8, name, cond, !add(baseInst, 255)>; 748} 749 750// Equality: eq 751let isCommutable = 1 in { 752defm EQ : SIMDConditionInt<"eq", SETEQ, 35>; 753defm EQ : SIMDCondition<I64x2, "eq", SETEQ, 214>; 754defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>; 755} // isCommutable = 1 756 757// Non-equality: ne 758let isCommutable = 1 in { 759defm NE : SIMDConditionInt<"ne", SETNE, 36>; 760defm NE : SIMDCondition<I64x2, "ne", SETNE, 215>; 761defm NE : SIMDConditionFP<"ne", SETUNE, 66>; 762} // isCommutable = 1 763 764// Less than: lt_s / lt_u / lt 765defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>; 766defm LT_S : SIMDCondition<I64x2, "lt_s", SETLT, 216>; 767defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>; 768defm LT : SIMDConditionFP<"lt", SETOLT, 67>; 769 770// Greater than: gt_s / gt_u / gt 771defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>; 772defm GT_S : SIMDCondition<I64x2, "gt_s", SETGT, 217>; 773defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>; 774defm GT : SIMDConditionFP<"gt", SETOGT, 68>; 775 776// Less than or equal: le_s / le_u / le 777defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>; 778defm LE_S : SIMDCondition<I64x2, "le_s", SETLE, 218>; 779defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>; 780defm LE : SIMDConditionFP<"le", SETOLE, 69>; 781 782// Greater than or equal: ge_s / ge_u / ge 783defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>; 784defm GE_S : SIMDCondition<I64x2, "ge_s", SETGE, 219>; 785defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>; 786defm GE : SIMDConditionFP<"ge", SETOGE, 70>; 787 788// Lower float comparisons that don't care about NaN to standard WebAssembly 789// float comparisons. These instructions are generated with nnan and in the 790// target-independent expansion of unordered comparisons and ordered ne. 791foreach nodes = [[seteq, EQ_F32x4], [setne, NE_F32x4], [setlt, LT_F32x4], 792 [setgt, GT_F32x4], [setle, LE_F32x4], [setge, GE_F32x4]] in 793def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 794 (nodes[1] $lhs, $rhs)>; 795 796foreach nodes = [[seteq, EQ_F64x2], [setne, NE_F64x2], [setlt, LT_F64x2], 797 [setgt, GT_F64x2], [setle, LE_F64x2], [setge, GE_F64x2]] in 798def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 799 (nodes[1] $lhs, $rhs)>; 800 801//===----------------------------------------------------------------------===// 802// Bitwise operations 803//===----------------------------------------------------------------------===// 804 805multiclass SIMDBinary<Vec vec, SDPatternOperator node, string name, 806 bits<32> simdop, list<Predicate> reqs = []> { 807 defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 808 (outs), (ins), 809 [(set (vec.vt V128:$dst), 810 (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))], 811 vec.prefix#"."#name#"\t$dst, $lhs, $rhs", 812 vec.prefix#"."#name, simdop, reqs>; 813} 814 815multiclass HalfPrecisionBinary<Vec vec, SDPatternOperator node, string name, 816 bits<32> simdop> { 817 defm "" : SIMDBinary<vec, node, name, simdop, [HasHalfPrecision]>; 818} 819 820multiclass SIMDBitwise<SDPatternOperator node, string name, bits<32> simdop, 821 bit commutable = false> { 822 let isCommutable = commutable in 823 defm "" : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 824 (outs), (ins), [], 825 "v128."#name#"\t$dst, $lhs, $rhs", "v128."#name, simdop>; 826 foreach vec = IntVecs in 827 def : Pat<(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 828 (!cast<NI>(NAME) $lhs, $rhs)>; 829} 830 831multiclass SIMDUnary<Vec vec, SDPatternOperator node, string name, 832 bits<32> simdop, list<Predicate> reqs = []> { 833 defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), 834 [(set (vec.vt V128:$dst), 835 (vec.vt (node (vec.vt V128:$v))))], 836 vec.prefix#"."#name#"\t$dst, $v", 837 vec.prefix#"."#name, simdop, reqs>; 838} 839 840multiclass HalfPrecisionUnary<Vec vec, SDPatternOperator node, string name, 841 bits<32> simdop> { 842 defm "" : SIMDUnary<vec, node, name, simdop, [HasHalfPrecision]>; 843} 844 845// Bitwise logic: v128.not 846defm NOT : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), [], 847 "v128.not\t$dst, $v", "v128.not", 77>; 848foreach vec = IntVecs in 849def : Pat<(vnot (vec.vt V128:$v)), (NOT $v)>; 850 851// Bitwise logic: v128.and / v128.or / v128.xor 852defm AND : SIMDBitwise<and, "and", 78, true>; 853defm OR : SIMDBitwise<or, "or", 80, true>; 854defm XOR : SIMDBitwise<xor, "xor", 81, true>; 855 856// Bitwise logic: v128.andnot 857def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; 858defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>; 859 860// Bitwise select: v128.bitselect 861defm BITSELECT : 862 SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [], 863 "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; 864 865foreach vec = AllVecs in 866def : Pat<(vec.vt (int_wasm_bitselect 867 (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))), 868 (BITSELECT $v1, $v2, $c)>; 869 870// Bitselect is equivalent to (c & v1) | (~c & v2) 871foreach vec = IntVecs in 872def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)), 873 (and (vnot V128:$c), (vec.vt V128:$v2)))), 874 (BITSELECT $v1, $v2, $c)>; 875 876// Bitselect is also equivalent to ((v1 ^ v2) & c) ^ v2 877foreach vec = IntVecs in 878def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)), 879 (vec.vt V128:$c)), 880 (vec.vt V128:$v2))), 881 (BITSELECT $v1, $v2, $c)>; 882 883// Same pattern with `c` negated so `a` and `b` get swapped. 884foreach vec = IntVecs in 885def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)), 886 (vnot (vec.vt V128:$c))), 887 (vec.vt V128:$v2))), 888 (BITSELECT $v2, $v1, $c)>; 889 890// Also implement vselect in terms of bitselect 891foreach vec = AllVecs in 892def : Pat<(vec.vt (vselect 893 (vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))), 894 (BITSELECT $v1, $v2, $c)>; 895 896// MVP select on v128 values 897defm SELECT_V128 : 898 I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [], 899 "v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>; 900 901foreach vec = AllVecs in { 902def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 903 (SELECT_V128 $lhs, $rhs, $cond)>; 904 905// ISD::SELECT requires its operand to conform to getBooleanContents, but 906// WebAssembly's select interprets any non-zero value as true, so we can fold 907// a setne with 0 into a select. 908def : Pat<(select 909 (i32 (setne I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 910 (SELECT_V128 $lhs, $rhs, $cond)>; 911 912// And again, this time with seteq instead of setne and the arms reversed. 913def : Pat<(select 914 (i32 (seteq I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 915 (SELECT_V128 $rhs, $lhs, $cond)>; 916} // foreach vec 917 918//===----------------------------------------------------------------------===// 919// Integer unary arithmetic 920//===----------------------------------------------------------------------===// 921 922multiclass SIMDUnaryInt<SDPatternOperator node, string name, bits<32> baseInst> { 923 defm "" : SIMDUnary<I8x16, node, name, baseInst>; 924 defm "" : SIMDUnary<I16x8, node, name, !add(baseInst, 32)>; 925 defm "" : SIMDUnary<I32x4, node, name, !add(baseInst, 64)>; 926 defm "" : SIMDUnary<I64x2, node, name, !add(baseInst, 96)>; 927} 928 929// Integer vector negation 930def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, $in)>; 931 932// Integer absolute value: abs 933defm ABS : SIMDUnaryInt<abs, "abs", 96>; 934 935// Integer negation: neg 936defm NEG : SIMDUnaryInt<ivneg, "neg", 97>; 937 938// Population count: popcnt 939defm POPCNT : SIMDUnary<I8x16, ctpop, "popcnt", 0x62>; 940 941// Any lane true: any_true 942defm ANYTRUE : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), [], 943 "v128.any_true\t$dst, $vec", "v128.any_true", 0x53>; 944 945foreach vec = IntVecs in 946def : Pat<(int_wasm_anytrue (vec.vt V128:$vec)), (ANYTRUE V128:$vec)>; 947 948// All lanes true: all_true 949multiclass SIMDAllTrue<Vec vec, bits<32> simdop> { 950 defm ALLTRUE_#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 951 [(set I32:$dst, 952 (i32 (int_wasm_alltrue (vec.vt V128:$vec))))], 953 vec.prefix#".all_true\t$dst, $vec", 954 vec.prefix#".all_true", simdop>; 955} 956 957defm "" : SIMDAllTrue<I8x16, 0x63>; 958defm "" : SIMDAllTrue<I16x8, 0x83>; 959defm "" : SIMDAllTrue<I32x4, 0xa3>; 960defm "" : SIMDAllTrue<I64x2, 0xc3>; 961 962// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 963// can be folded out 964foreach reduction = 965 [["int_wasm_anytrue", "ANYTRUE", "I8x16"], 966 ["int_wasm_anytrue", "ANYTRUE", "I16x8"], 967 ["int_wasm_anytrue", "ANYTRUE", "I32x4"], 968 ["int_wasm_anytrue", "ANYTRUE", "I64x2"], 969 ["int_wasm_alltrue", "ALLTRUE_I8x16", "I8x16"], 970 ["int_wasm_alltrue", "ALLTRUE_I16x8", "I16x8"], 971 ["int_wasm_alltrue", "ALLTRUE_I32x4", "I32x4"], 972 ["int_wasm_alltrue", "ALLTRUE_I64x2", "I64x2"]] in { 973defvar intrinsic = !cast<Intrinsic>(reduction[0]); 974defvar inst = !cast<NI>(reduction[1]); 975defvar vec = !cast<Vec>(reduction[2]); 976def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>; 977def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>; 978def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>; 979} 980 981multiclass SIMDBitmask<Vec vec, bits<32> simdop> { 982 defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 983 [(set I32:$dst, 984 (i32 (int_wasm_bitmask (vec.vt V128:$vec))))], 985 vec.prefix#".bitmask\t$dst, $vec", vec.prefix#".bitmask", 986 simdop>; 987} 988 989defm BITMASK : SIMDBitmask<I8x16, 100>; 990defm BITMASK : SIMDBitmask<I16x8, 132>; 991defm BITMASK : SIMDBitmask<I32x4, 164>; 992defm BITMASK : SIMDBitmask<I64x2, 196>; 993 994//===----------------------------------------------------------------------===// 995// Bit shifts 996//===----------------------------------------------------------------------===// 997 998multiclass SIMDShift<Vec vec, SDNode node, string name, bits<32> simdop> { 999 defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), (outs), (ins), 1000 [(set (vec.vt V128:$dst), (node V128:$vec, I32:$x))], 1001 vec.prefix#"."#name#"\t$dst, $vec, $x", 1002 vec.prefix#"."#name, simdop>; 1003} 1004 1005multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> { 1006 defm "" : SIMDShift<I8x16, node, name, baseInst>; 1007 defm "" : SIMDShift<I16x8, node, name, !add(baseInst, 32)>; 1008 defm "" : SIMDShift<I32x4, node, name, !add(baseInst, 64)>; 1009 defm "" : SIMDShift<I64x2, node, name, !add(baseInst, 96)>; 1010} 1011 1012// WebAssembly SIMD shifts are nonstandard in that the shift amount is 1013// an i32 rather than a vector, so they need custom nodes. 1014def wasm_shift_t : 1015 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; 1016def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>; 1017def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>; 1018def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>; 1019 1020// Left shift by scalar: shl 1021defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>; 1022 1023// Right shift by scalar: shr_s / shr_u 1024defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>; 1025defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>; 1026 1027// Optimize away an explicit mask on a shift count. 1028def : Pat<(wasm_shl (v16i8 V128:$lhs), (and I32:$rhs, 7)), 1029 (SHL_I8x16 V128:$lhs, I32:$rhs)>; 1030def : Pat<(wasm_shr_s (v16i8 V128:$lhs), (and I32:$rhs, 7)), 1031 (SHR_S_I8x16 V128:$lhs, I32:$rhs)>; 1032def : Pat<(wasm_shr_u (v16i8 V128:$lhs), (and I32:$rhs, 7)), 1033 (SHR_U_I8x16 V128:$lhs, I32:$rhs)>; 1034 1035def : Pat<(wasm_shl (v8i16 V128:$lhs), (and I32:$rhs, 15)), 1036 (SHL_I16x8 V128:$lhs, I32:$rhs)>; 1037def : Pat<(wasm_shr_s (v8i16 V128:$lhs), (and I32:$rhs, 15)), 1038 (SHR_S_I16x8 V128:$lhs, I32:$rhs)>; 1039def : Pat<(wasm_shr_u (v8i16 V128:$lhs), (and I32:$rhs, 15)), 1040 (SHR_U_I16x8 V128:$lhs, I32:$rhs)>; 1041 1042def : Pat<(wasm_shl (v4i32 V128:$lhs), (and I32:$rhs, 31)), 1043 (SHL_I32x4 V128:$lhs, I32:$rhs)>; 1044def : Pat<(wasm_shr_s (v4i32 V128:$lhs), (and I32:$rhs, 31)), 1045 (SHR_S_I32x4 V128:$lhs, I32:$rhs)>; 1046def : Pat<(wasm_shr_u (v4i32 V128:$lhs), (and I32:$rhs, 31)), 1047 (SHR_U_I32x4 V128:$lhs, I32:$rhs)>; 1048 1049def : Pat<(wasm_shl (v2i64 V128:$lhs), (and I32:$rhs, 63)), 1050 (SHL_I64x2 V128:$lhs, I32:$rhs)>; 1051def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (and I32:$rhs, 63)), 1052 (SHR_S_I64x2 V128:$lhs, I32:$rhs)>; 1053def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (and I32:$rhs, 63)), 1054 (SHR_U_I64x2 V128:$lhs, I32:$rhs)>; 1055def : Pat<(wasm_shl (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))), 1056 (SHL_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>; 1057def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))), 1058 (SHR_S_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>; 1059def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))), 1060 (SHR_U_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>; 1061 1062//===----------------------------------------------------------------------===// 1063// Integer binary arithmetic 1064//===----------------------------------------------------------------------===// 1065 1066multiclass SIMDBinaryIntNoI8x16<SDPatternOperator node, string name, bits<32> baseInst> { 1067 defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>; 1068 defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>; 1069 defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>; 1070} 1071 1072multiclass SIMDBinaryIntSmall<SDPatternOperator node, string name, bits<32> baseInst> { 1073 defm "" : SIMDBinary<I8x16, node, name, baseInst>; 1074 defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>; 1075} 1076 1077multiclass SIMDBinaryIntNoI64x2<SDPatternOperator node, string name, bits<32> baseInst> { 1078 defm "" : SIMDBinaryIntSmall<node, name, baseInst>; 1079 defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>; 1080} 1081 1082multiclass SIMDBinaryInt<SDPatternOperator node, string name, bits<32> baseInst> { 1083 defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>; 1084 defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>; 1085} 1086 1087// Integer addition: add / add_sat_s / add_sat_u 1088let isCommutable = 1 in { 1089defm ADD : SIMDBinaryInt<add, "add", 110>; 1090defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_sat_s", 111>; 1091defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_sat_u", 112>; 1092} // isCommutable = 1 1093 1094// Integer subtraction: sub / sub_sat_s / sub_sat_u 1095defm SUB : SIMDBinaryInt<sub, "sub", 113>; 1096defm SUB_SAT_S : 1097 SIMDBinaryIntSmall<int_wasm_sub_sat_signed, "sub_sat_s", 114>; 1098defm SUB_SAT_U : 1099 SIMDBinaryIntSmall<int_wasm_sub_sat_unsigned, "sub_sat_u", 115>; 1100 1101// Integer multiplication: mul 1102let isCommutable = 1 in 1103defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>; 1104 1105// Integer min_s / min_u / max_s / max_u 1106let isCommutable = 1 in { 1107defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>; 1108defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>; 1109defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>; 1110defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>; 1111} // isCommutable = 1 1112 1113// Integer unsigned rounding average: avgr_u 1114let isCommutable = 1 in { 1115defm AVGR_U : SIMDBinary<I8x16, int_wasm_avgr_unsigned, "avgr_u", 123>; 1116defm AVGR_U : SIMDBinary<I16x8, int_wasm_avgr_unsigned, "avgr_u", 155>; 1117} 1118 1119def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), (add $lhs, $rhs), 1120 "return N->getFlags().hasNoUnsignedWrap();">; 1121 1122foreach vec = [I8x16, I16x8] in { 1123defvar inst = !cast<NI>("AVGR_U_"#vec); 1124def : Pat<(wasm_shr_u 1125 (add_nuw 1126 (add_nuw (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 1127 (vec.splat (i32 1))), 1128 (i32 1)), 1129 (inst $lhs, $rhs)>; 1130} 1131 1132// Widening dot product: i32x4.dot_i16x8_s 1133let isCommutable = 1 in 1134defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 1135 [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], 1136 "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", 1137 186>; 1138 1139// Extending multiplication: extmul_{low,high}_P, extmul_high 1140def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1141def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>; 1142def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>; 1143def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>; 1144def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>; 1145 1146multiclass SIMDExtBinary<Vec vec, SDPatternOperator node, string name, 1147 bits<32> simdop> { 1148 defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 1149 (outs), (ins), 1150 [(set (vec.vt V128:$dst), (node 1151 (vec.split.vt V128:$lhs),(vec.split.vt V128:$rhs)))], 1152 vec.prefix#"."#name#"\t$dst, $lhs, $rhs", 1153 vec.prefix#"."#name, simdop>; 1154} 1155 1156class ExtMulPat<SDNode extend> : 1157 PatFrag<(ops node:$lhs, node:$rhs), 1158 (mul (extend $lhs), (extend $rhs))> {} 1159 1160def extmul_low_s : ExtMulPat<extend_low_s>; 1161def extmul_high_s : ExtMulPat<extend_high_s>; 1162def extmul_low_u : ExtMulPat<extend_low_u>; 1163def extmul_high_u : ExtMulPat<extend_high_u>; 1164 1165defm EXTMUL_LOW_S : 1166 SIMDExtBinary<I16x8, extmul_low_s, "extmul_low_i8x16_s", 0x9c>; 1167defm EXTMUL_HIGH_S : 1168 SIMDExtBinary<I16x8, extmul_high_s, "extmul_high_i8x16_s", 0x9d>; 1169defm EXTMUL_LOW_U : 1170 SIMDExtBinary<I16x8, extmul_low_u, "extmul_low_i8x16_u", 0x9e>; 1171defm EXTMUL_HIGH_U : 1172 SIMDExtBinary<I16x8, extmul_high_u, "extmul_high_i8x16_u", 0x9f>; 1173 1174defm EXTMUL_LOW_S : 1175 SIMDExtBinary<I32x4, extmul_low_s, "extmul_low_i16x8_s", 0xbc>; 1176defm EXTMUL_HIGH_S : 1177 SIMDExtBinary<I32x4, extmul_high_s, "extmul_high_i16x8_s", 0xbd>; 1178defm EXTMUL_LOW_U : 1179 SIMDExtBinary<I32x4, extmul_low_u, "extmul_low_i16x8_u", 0xbe>; 1180defm EXTMUL_HIGH_U : 1181 SIMDExtBinary<I32x4, extmul_high_u, "extmul_high_i16x8_u", 0xbf>; 1182 1183defm EXTMUL_LOW_S : 1184 SIMDExtBinary<I64x2, extmul_low_s, "extmul_low_i32x4_s", 0xdc>; 1185defm EXTMUL_HIGH_S : 1186 SIMDExtBinary<I64x2, extmul_high_s, "extmul_high_i32x4_s", 0xdd>; 1187defm EXTMUL_LOW_U : 1188 SIMDExtBinary<I64x2, extmul_low_u, "extmul_low_i32x4_u", 0xde>; 1189defm EXTMUL_HIGH_U : 1190 SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>; 1191 1192//===----------------------------------------------------------------------===// 1193// Floating-point unary arithmetic 1194//===----------------------------------------------------------------------===// 1195 1196multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> { 1197 defm "" : SIMDUnary<F32x4, node, name, baseInst>; 1198 defm "" : SIMDUnary<F64x2, node, name, !add(baseInst, 12)>; 1199 // Unlike F32x4 and F64x2 there's not a gap in the opcodes between "neg" and 1200 // "sqrt" so subtract one from the offset. 1201 defm "" : HalfPrecisionUnary<F16x8, node, name, 1202 !add(baseInst,!if(!eq(name, "sqrt"), 80, 81))>; 1203} 1204 1205// Absolute value: abs 1206defm ABS : SIMDUnaryFP<fabs, "abs", 224>; 1207 1208// Negation: neg 1209defm NEG : SIMDUnaryFP<fneg, "neg", 225>; 1210 1211// Square root: sqrt 1212defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>; 1213 1214// Rounding: ceil, floor, trunc, nearest 1215defm CEIL : SIMDUnary<F32x4, fceil, "ceil", 0x67>; 1216defm FLOOR : SIMDUnary<F32x4, ffloor, "floor", 0x68>; 1217defm TRUNC: SIMDUnary<F32x4, ftrunc, "trunc", 0x69>; 1218defm NEAREST: SIMDUnary<F32x4, fnearbyint, "nearest", 0x6a>; 1219defm CEIL : SIMDUnary<F64x2, fceil, "ceil", 0x74>; 1220defm FLOOR : SIMDUnary<F64x2, ffloor, "floor", 0x75>; 1221defm TRUNC: SIMDUnary<F64x2, ftrunc, "trunc", 0x7a>; 1222defm NEAREST: SIMDUnary<F64x2, fnearbyint, "nearest", 0x94>; 1223defm CEIL : HalfPrecisionUnary<F16x8, fceil, "ceil", 0x13c>; 1224defm FLOOR : HalfPrecisionUnary<F16x8, ffloor, "floor", 0x13d>; 1225defm TRUNC : HalfPrecisionUnary<F16x8, ftrunc, "trunc", 0x13e>; 1226defm NEAREST : HalfPrecisionUnary<F16x8, fnearbyint, "nearest", 0x13f>; 1227 1228// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. 1229def : Pat<(v4f32 (frint (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>; 1230def : Pat<(v2f64 (frint (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>; 1231def : Pat<(v8f16 (frint (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>; 1232 1233// WebAssembly always rounds ties-to-even, so map froundeven to fnearbyint. 1234def : Pat<(v4f32 (froundeven (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>; 1235def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>; 1236def : Pat<(v8f16 (froundeven (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>; 1237 1238//===----------------------------------------------------------------------===// 1239// Floating-point binary arithmetic 1240//===----------------------------------------------------------------------===// 1241 1242multiclass SIMDBinaryFP<SDPatternOperator node, string name, bits<32> baseInst> { 1243 defm "" : SIMDBinary<F32x4, node, name, baseInst>; 1244 defm "" : SIMDBinary<F64x2, node, name, !add(baseInst, 12)>; 1245 defm "" : HalfPrecisionBinary<F16x8, node, name, !add(baseInst, 80)>; 1246} 1247 1248// Addition: add 1249let isCommutable = 1 in 1250defm ADD : SIMDBinaryFP<fadd, "add", 228>; 1251 1252// Subtraction: sub 1253defm SUB : SIMDBinaryFP<fsub, "sub", 229>; 1254 1255// Multiplication: mul 1256let isCommutable = 1 in 1257defm MUL : SIMDBinaryFP<fmul, "mul", 230>; 1258 1259// Division: div 1260defm DIV : SIMDBinaryFP<fdiv, "div", 231>; 1261 1262// NaN-propagating minimum: min 1263defm MIN : SIMDBinaryFP<fminimum, "min", 232>; 1264 1265// NaN-propagating maximum: max 1266defm MAX : SIMDBinaryFP<fmaximum, "max", 233>; 1267 1268// Pseudo-minimum: pmin 1269def pmin : PatFrags<(ops node:$lhs, node:$rhs), [ 1270 (vselect (setolt $rhs, $lhs), $rhs, $lhs), 1271 (vselect (setole $rhs, $lhs), $rhs, $lhs), 1272 (vselect (setogt $lhs, $rhs), $rhs, $lhs), 1273 (vselect (setoge $lhs, $rhs), $rhs, $lhs) 1274]>; 1275defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>; 1276 1277// Pseudo-maximum: pmax 1278def pmax : PatFrags<(ops node:$lhs, node:$rhs), [ 1279 (vselect (setogt $rhs, $lhs), $rhs, $lhs), 1280 (vselect (setoge $rhs, $lhs), $rhs, $lhs), 1281 (vselect (setolt $lhs, $rhs), $rhs, $lhs), 1282 (vselect (setole $lhs, $rhs), $rhs, $lhs) 1283]>; 1284defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>; 1285 1286// Also match the pmin/pmax cases where the operands are int vectors (but the 1287// comparison is still a floating point comparison). This can happen when using 1288// the wasm_simd128.h intrinsics because v128_t is an integer vector. 1289foreach vec = [F32x4, F64x2, F16x8] in { 1290defvar pmin = !cast<NI>("PMIN_"#vec); 1291defvar pmax = !cast<NI>("PMAX_"#vec); 1292def : Pat<(vec.int_vt (vselect 1293 (setolt (vec.vt (bitconvert V128:$rhs)), 1294 (vec.vt (bitconvert V128:$lhs))), 1295 V128:$rhs, V128:$lhs)), 1296 (pmin $lhs, $rhs)>; 1297def : Pat<(vec.int_vt (vselect 1298 (setolt (vec.vt (bitconvert V128:$lhs)), 1299 (vec.vt (bitconvert V128:$rhs))), 1300 V128:$rhs, V128:$lhs)), 1301 (pmax $lhs, $rhs)>; 1302} 1303 1304// And match the pmin/pmax LLVM intrinsics as well 1305def : Pat<(v4f32 (int_wasm_pmin (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 1306 (PMIN_F32x4 V128:$lhs, V128:$rhs)>; 1307def : Pat<(v4f32 (int_wasm_pmax (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 1308 (PMAX_F32x4 V128:$lhs, V128:$rhs)>; 1309def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 1310 (PMIN_F64x2 V128:$lhs, V128:$rhs)>; 1311def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 1312 (PMAX_F64x2 V128:$lhs, V128:$rhs)>; 1313def : Pat<(v8f16 (int_wasm_pmin (v8f16 V128:$lhs), (v8f16 V128:$rhs))), 1314 (PMIN_F16x8 V128:$lhs, V128:$rhs)>; 1315def : Pat<(v8f16 (int_wasm_pmax (v8f16 V128:$lhs), (v8f16 V128:$rhs))), 1316 (PMAX_F16x8 V128:$lhs, V128:$rhs)>; 1317 1318//===----------------------------------------------------------------------===// 1319// Conversions 1320//===----------------------------------------------------------------------===// 1321 1322multiclass SIMDConvert<Vec vec, Vec arg, SDPatternOperator op, string name, 1323 bits<32> simdop, list<Predicate> reqs = []> { 1324 defm op#_#vec : 1325 SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 1326 [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], 1327 vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop, reqs>; 1328} 1329 1330multiclass HalfPrecisionConvert<Vec vec, Vec arg, SDPatternOperator op, 1331 string name, bits<32> simdop> { 1332 defm "" : SIMDConvert<vec, arg, op, name, simdop, [HasHalfPrecision]>; 1333} 1334 1335// Floating point to integer with saturation: trunc_sat 1336defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>; 1337defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>; 1338defm "" : HalfPrecisionConvert<I16x8, F16x8, fp_to_sint, "trunc_sat_f16x8_s", 0x148>; 1339defm "" : HalfPrecisionConvert<I16x8, F16x8, fp_to_uint, "trunc_sat_f16x8_u", 0x149>; 1340 1341// Support the saturating variety as well. 1342def trunc_s_sat32 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i32)>; 1343def trunc_u_sat32 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i32)>; 1344def : Pat<(v4i32 (trunc_s_sat32 (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>; 1345def : Pat<(v4i32 (trunc_u_sat32 (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>; 1346 1347def trunc_sat_zero_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1348def trunc_sat_zero_s : 1349 SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>; 1350def trunc_sat_zero_u : 1351 SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_U", trunc_sat_zero_t>; 1352defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_s, "trunc_sat_f64x2_s_zero", 1353 0xfc>; 1354defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_u, "trunc_sat_f64x2_u_zero", 1355 0xfd>; 1356 1357// Integer to floating point: convert 1358def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1359def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>; 1360def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>; 1361defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>; 1362defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>; 1363defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>; 1364defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>; 1365defm "" : HalfPrecisionConvert<F16x8, I16x8, sint_to_fp, "convert_i16x8_s", 0x14a>; 1366defm "" : HalfPrecisionConvert<F16x8, I16x8, uint_to_fp, "convert_i16x8_u", 0x14b>; 1367 1368// Extending operations 1369// TODO: refactor this to be uniform for i64x2 if the numbering is not changed. 1370multiclass SIMDExtend<Vec vec, bits<32> baseInst> { 1371 defm "" : SIMDConvert<vec, vec.split, extend_low_s, 1372 "extend_low_"#vec.split.prefix#"_s", baseInst>; 1373 defm "" : SIMDConvert<vec, vec.split, extend_high_s, 1374 "extend_high_"#vec.split.prefix#"_s", !add(baseInst, 1)>; 1375 defm "" : SIMDConvert<vec, vec.split, extend_low_u, 1376 "extend_low_"#vec.split.prefix#"_u", !add(baseInst, 2)>; 1377 defm "" : SIMDConvert<vec, vec.split, extend_high_u, 1378 "extend_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>; 1379} 1380 1381defm "" : SIMDExtend<I16x8, 0x87>; 1382defm "" : SIMDExtend<I32x4, 0xa7>; 1383defm "" : SIMDExtend<I64x2, 0xc7>; 1384 1385// Narrowing operations 1386multiclass SIMDNarrow<Vec vec, bits<32> baseInst> { 1387 defvar name = vec.split.prefix#".narrow_"#vec.prefix; 1388 defm NARROW_S_#vec.split : 1389 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 1390 [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_signed 1391 (vec.vt V128:$low), (vec.vt V128:$high))))], 1392 name#"_s\t$dst, $low, $high", name#"_s", baseInst>; 1393 defm NARROW_U_#vec.split : 1394 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 1395 [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_unsigned 1396 (vec.vt V128:$low), (vec.vt V128:$high))))], 1397 name#"_u\t$dst, $low, $high", name#"_u", !add(baseInst, 1)>; 1398} 1399 1400defm "" : SIMDNarrow<I16x8, 101>; 1401defm "" : SIMDNarrow<I32x4, 133>; 1402 1403// WebAssemblyISD::NARROW_U 1404def wasm_narrow_t : SDTypeProfile<1, 2, []>; 1405def wasm_narrow_u : SDNode<"WebAssemblyISD::NARROW_U", wasm_narrow_t>; 1406def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))), 1407 (NARROW_U_I8x16 $left, $right)>; 1408def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))), 1409 (NARROW_U_I16x8 $left, $right)>; 1410 1411// Bitcasts are nops 1412// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types 1413foreach t1 = AllVecs in 1414foreach t2 = AllVecs in 1415if !ne(t1, t2) then 1416def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>; 1417 1418// Extended pairwise addition 1419defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed, 1420 "extadd_pairwise_i8x16_s", 0x7c>; 1421defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned, 1422 "extadd_pairwise_i8x16_u", 0x7d>; 1423defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed, 1424 "extadd_pairwise_i16x8_s", 0x7e>; 1425defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned, 1426 "extadd_pairwise_i16x8_u", 0x7f>; 1427 1428// f64x2 <-> f32x4 conversions 1429def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1430def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>; 1431defm "" : SIMDConvert<F32x4, F64x2, demote_zero, 1432 "demote_f64x2_zero", 0x5e>; 1433 1434def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1435def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>; 1436defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>; 1437 1438// Lower extending loads to load64_zero + promote_low 1439def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { 1440 let MemoryVT = v2f32; 1441} 1442// Adapted from the body of LoadPatNoOffset 1443// TODO: other addressing patterns 1444def : Pat<(v2f64 (extloadv2f32 (i32 I32:$addr))), 1445 (promote_low_F64x2 (LOAD_ZERO_I64x2_A32 0, 0, I32:$addr))>, 1446 Requires<[HasAddr32]>; 1447def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))), 1448 (promote_low_F64x2 (LOAD_ZERO_I64x2_A64 0, 0, I64:$addr))>, 1449 Requires<[HasAddr64]>; 1450 1451//===----------------------------------------------------------------------===// 1452// Saturating Rounding Q-Format Multiplication 1453//===----------------------------------------------------------------------===// 1454 1455defm Q15MULR_SAT_S : 1456 SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>; 1457 1458//===----------------------------------------------------------------------===// 1459// Relaxed swizzle 1460//===----------------------------------------------------------------------===// 1461 1462defm RELAXED_SWIZZLE : 1463 RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), 1464 [(set (v16i8 V128:$dst), 1465 (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], 1466 "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 0x100>; 1467 1468//===----------------------------------------------------------------------===// 1469// Relaxed floating-point to int conversions 1470//===----------------------------------------------------------------------===// 1471 1472multiclass RelaxedConvert<Vec vec, Vec arg, SDPatternOperator op, string name, bits<32> simdop> { 1473 defm op#_#vec : 1474 RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 1475 [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], 1476 vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>; 1477} 1478 1479defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_signed, 1480 "relaxed_trunc_f32x4_s", 0x101>; 1481defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_unsigned, 1482 "relaxed_trunc_f32x4_u", 0x102>; 1483defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_signed_zero, 1484 "relaxed_trunc_f64x2_s_zero", 0x103>; 1485defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero, 1486 "relaxed_trunc_f64x2_u_zero", 0x104>; 1487 1488//===----------------------------------------------------------------------===// 1489// Relaxed (Negative) Multiply-Add (madd/nmadd) 1490//===----------------------------------------------------------------------===// 1491 1492multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> { 1493 defm MADD_#vec : 1494 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), 1495 [(set (vec.vt V128:$dst), (int_wasm_relaxed_madd 1496 (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], 1497 vec.prefix#".relaxed_madd\t$dst, $a, $b, $c", 1498 vec.prefix#".relaxed_madd", simdopA, reqs>; 1499 defm NMADD_#vec : 1500 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), 1501 [(set (vec.vt V128:$dst), (int_wasm_relaxed_nmadd 1502 (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], 1503 vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c", 1504 vec.prefix#".relaxed_nmadd", simdopS, reqs>; 1505} 1506 1507defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>; 1508defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>; 1509defm "" : SIMDMADD<F16x8, 0x146, 0x147, [HasHalfPrecision]>; 1510 1511//===----------------------------------------------------------------------===// 1512// Laneselect 1513//===----------------------------------------------------------------------===// 1514 1515multiclass SIMDLANESELECT<Vec vec, bits<32> op> { 1516 defm LANESELECT_#vec : 1517 RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), 1518 [(set (vec.vt V128:$dst), (int_wasm_relaxed_laneselect 1519 (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], 1520 vec.prefix#".relaxed_laneselect\t$dst, $a, $b, $c", 1521 vec.prefix#".relaxed_laneselect", op>; 1522} 1523 1524defm "" : SIMDLANESELECT<I8x16, 0x109>; 1525defm "" : SIMDLANESELECT<I16x8, 0x10a>; 1526defm "" : SIMDLANESELECT<I32x4, 0x10b>; 1527defm "" : SIMDLANESELECT<I64x2, 0x10c>; 1528 1529//===----------------------------------------------------------------------===// 1530// Relaxed floating-point min and max. 1531//===----------------------------------------------------------------------===// 1532 1533multiclass RelaxedBinary<Vec vec, SDPatternOperator node, string name, 1534 bits<32> simdop> { 1535 defm _#vec : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 1536 (outs), (ins), 1537 [(set (vec.vt V128:$dst), 1538 (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))], 1539 vec.prefix#"."#name#"\t$dst, $lhs, $rhs", 1540 vec.prefix#"."#name, simdop>; 1541} 1542 1543defm SIMD_RELAXED_FMIN : 1544 RelaxedBinary<F32x4, int_wasm_relaxed_min, "relaxed_min", 0x10d>; 1545defm SIMD_RELAXED_FMAX : 1546 RelaxedBinary<F32x4, int_wasm_relaxed_max, "relaxed_max", 0x10e>; 1547defm SIMD_RELAXED_FMIN : 1548 RelaxedBinary<F64x2, int_wasm_relaxed_min, "relaxed_min", 0x10f>; 1549defm SIMD_RELAXED_FMAX : 1550 RelaxedBinary<F64x2, int_wasm_relaxed_max, "relaxed_max", 0x110>; 1551 1552//===----------------------------------------------------------------------===// 1553// Relaxed rounding q15 multiplication 1554//===----------------------------------------------------------------------===// 1555 1556defm RELAXED_Q15MULR_S : 1557 RelaxedBinary<I16x8, int_wasm_relaxed_q15mulr_signed, "relaxed_q15mulr_s", 1558 0x111>; 1559 1560//===----------------------------------------------------------------------===// 1561// Relaxed integer dot product 1562//===----------------------------------------------------------------------===// 1563 1564defm RELAXED_DOT : 1565 RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 1566 [(set (v8i16 V128:$dst), (int_wasm_relaxed_dot_i8x16_i7x16_signed 1567 (v16i8 V128:$lhs), (v16i8 V128:$rhs)))], 1568 "i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs", 1569 "i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>; 1570 1571defm RELAXED_DOT_ADD : 1572 RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc), 1573 (outs), (ins), 1574 [(set (v4i32 V128:$dst), (int_wasm_relaxed_dot_i8x16_i7x16_add_signed 1575 (v16i8 V128:$lhs), (v16i8 V128:$rhs), (v4i32 V128:$acc)))], 1576 "i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc", 1577 "i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>; 1578 1579//===----------------------------------------------------------------------===// 1580// Relaxed BFloat16 dot product 1581//===----------------------------------------------------------------------===// 1582 1583defm RELAXED_DOT_BFLOAT : 1584 RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc), 1585 (outs), (ins), 1586 [(set (v4f32 V128:$dst), (int_wasm_relaxed_dot_bf16x8_add_f32 1587 (v8i16 V128:$lhs), (v8i16 V128:$rhs), (v4f32 V128:$acc)))], 1588 "f32x4.relaxed_dot_bf16x8_add_f32\t$dst, $lhs, $rhs, $acc", 1589 "f32x4.relaxed_dot_bf16x8_add_f32", 0x114>; 1590