1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 16), 8, 48 !if (!eq (EltVT.Size, 32), 4, 49 !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT; 50 51 // The vector VT. 52 ValueType VT = !cast<ValueType>(VTName); 53 54 string EltTypeName = !cast<string>(EltVT); 55 // Size of the element type in bits, e.g. 32 for v16i32. 56 string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName))); 57 int EltSize = EltVT.Size; 58 59 // "i" for integer types and "f" for floating-point types 60 string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName)); 61 62 // Size of RC in bits, e.g. 512 for VR512. 63 int Size = VT.Size; 64 65 // The corresponding memory operand, e.g. i512mem for VR512. 66 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 67 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem"); 68 // FP scalar memory operand for intrinsics - ssmem/sdmem. 69 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"), 70 !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"), 71 !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 72 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)))); 73 74 // Load patterns 75 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 76 77 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 78 79 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName)); 80 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 81 82 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"), 83 !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"), 84 !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"), 85 !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)))); 86 87 // The string to specify embedded broadcast in assembly. 88 string BroadcastStr = "{1to" # NumElts # "}"; 89 90 // 8-bit compressed displacement tuple/subvector format. This is only 91 // defined for NumElts <= 8. 92 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 93 !cast<CD8VForm>("CD8VT" # NumElts), ?); 94 95 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 96 !if (!eq (Size, 256), sub_ymm, ?)); 97 98 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 99 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 100 !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME? 101 !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME? 102 SSEPackedInt)))); 103 104 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, 105 !if (!eq (EltTypeName, "f16"), FR16X, 106 !if (!eq (EltTypeName, "bf16"), FR16X, 107 FR64X))); 108 109 dag ImmAllZerosV = (VT immAllZerosV); 110 111 string ZSuffix = !if (!eq (Size, 128), "Z128", 112 !if (!eq (Size, 256), "Z256", "Z")); 113} 114 115def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 116def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 117def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 118def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 119def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">; 120def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">; 121def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 122def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 123 124// "x" in v32i8x_info means RC = VR256X 125def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 126def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 127def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 128def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 129def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">; 130def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">; 131def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 132def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 133 134def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 135def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 136def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 137def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 138def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">; 139def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf">; 140def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 141def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 142 143// We map scalar types to the smallest (128-bit) vector type 144// with the appropriate element type. This allows to use the same masking logic. 145def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 146def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 147def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">; 148def bf16x_info : X86VectorVTInfo<1, bf16, VR128X, "sbf">; 149def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 150def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 151 152class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 153 X86VectorVTInfo i128> { 154 X86VectorVTInfo info512 = i512; 155 X86VectorVTInfo info256 = i256; 156 X86VectorVTInfo info128 = i128; 157} 158 159def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 160 v16i8x_info>; 161def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 162 v8i16x_info>; 163def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 164 v4i32x_info>; 165def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 166 v2i64x_info>; 167def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info, 168 v8f16x_info>; 169def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info, 170 v8bf16x_info>; 171def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 172 v4f32x_info>; 173def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 174 v2f64x_info>; 175 176class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 177 ValueType _vt> { 178 RegisterClass KRC = _krc; 179 RegisterClass KRCWM = _krcwm; 180 ValueType KVT = _vt; 181} 182 183def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 184def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 185def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 186def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 187def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 188def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 189def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 190 191// Used for matching masked operations. Ensures the operation part only has a 192// single use. 193def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 194 (vselect node:$mask, node:$src1, node:$src2), [{ 195 return isProfitableToFormMaskedOp(N); 196}]>; 197 198def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 199 (X86selects node:$mask, node:$src1, node:$src2), [{ 200 return isProfitableToFormMaskedOp(N); 201}]>; 202 203// This multiclass generates the masking variants from the non-masking 204// variant. It only provides the assembly pieces for the masking variants. 205// It assumes custom ISel patterns for masking which can be provided as 206// template arguments. 207multiclass AVX512_maskable_custom<bits<8> O, Format F, 208 dag Outs, 209 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 210 string OpcodeStr, 211 string AttSrcAsm, string IntelSrcAsm, 212 list<dag> Pattern, 213 list<dag> MaskingPattern, 214 list<dag> ZeroMaskingPattern, 215 string MaskingConstraint = "", 216 bit IsCommutable = 0, 217 bit IsKCommutable = 0, 218 bit IsKZCommutable = IsCommutable, 219 string ClobberConstraint = ""> { 220 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 221 def NAME: AVX512<O, F, Outs, Ins, 222 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 223 "$dst, "#IntelSrcAsm#"}", 224 Pattern>; 225 226 // Prefer over VMOV*rrk Pat<> 227 let isCommutable = IsKCommutable in 228 def NAME#k: AVX512<O, F, Outs, MaskingIns, 229 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 230 "$dst {${mask}}, "#IntelSrcAsm#"}", 231 MaskingPattern>, 232 EVEX_K { 233 // In case of the 3src subclass this is overridden with a let. 234 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 235 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 236 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 237 } 238 239 // Zero mask does not add any restrictions to commute operands transformation. 240 // So, it is Ok to use IsCommutable instead of IsKCommutable. 241 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 242 Constraints = ClobberConstraint in 243 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 244 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 245 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 246 ZeroMaskingPattern>, 247 EVEX_KZ; 248} 249 250 251// Common base class of AVX512_maskable and AVX512_maskable_3src. 252multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 253 dag Outs, 254 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 255 string OpcodeStr, 256 string AttSrcAsm, string IntelSrcAsm, 257 dag RHS, dag MaskingRHS, 258 SDPatternOperator Select = vselect_mask, 259 string MaskingConstraint = "", 260 bit IsCommutable = 0, 261 bit IsKCommutable = 0, 262 bit IsKZCommutable = IsCommutable, 263 string ClobberConstraint = ""> : 264 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 265 AttSrcAsm, IntelSrcAsm, 266 [(set _.RC:$dst, RHS)], 267 [(set _.RC:$dst, MaskingRHS)], 268 [(set _.RC:$dst, 269 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 270 MaskingConstraint, IsCommutable, 271 IsKCommutable, IsKZCommutable, ClobberConstraint>; 272 273// This multiclass generates the unconditional/non-masking, the masking and 274// the zero-masking variant of the vector instruction. In the masking case, the 275// preserved vector elements come from a new dummy input operand tied to $dst. 276// This version uses a separate dag for non-masking and masking. 277multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 278 dag Outs, dag Ins, string OpcodeStr, 279 string AttSrcAsm, string IntelSrcAsm, 280 dag RHS, dag MaskRHS, 281 string ClobberConstraint = "", 282 bit IsCommutable = 0, bit IsKCommutable = 0, 283 bit IsKZCommutable = IsCommutable> : 284 AVX512_maskable_custom<O, F, Outs, Ins, 285 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 286 !con((ins _.KRCWM:$mask), Ins), 287 OpcodeStr, AttSrcAsm, IntelSrcAsm, 288 [(set _.RC:$dst, RHS)], 289 [(set _.RC:$dst, 290 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 291 [(set _.RC:$dst, 292 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 293 "$src0 = $dst", IsCommutable, IsKCommutable, 294 IsKZCommutable, ClobberConstraint>; 295 296// This multiclass generates the unconditional/non-masking, the masking and 297// the zero-masking variant of the vector instruction. In the masking case, the 298// preserved vector elements come from a new dummy input operand tied to $dst. 299multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 300 dag Outs, dag Ins, string OpcodeStr, 301 string AttSrcAsm, string IntelSrcAsm, 302 dag RHS, 303 bit IsCommutable = 0, bit IsKCommutable = 0, 304 bit IsKZCommutable = IsCommutable, 305 SDPatternOperator Select = vselect_mask, 306 string ClobberConstraint = ""> : 307 AVX512_maskable_common<O, F, _, Outs, Ins, 308 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 309 !con((ins _.KRCWM:$mask), Ins), 310 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 311 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 312 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 313 IsKZCommutable, ClobberConstraint>; 314 315// This multiclass generates the unconditional/non-masking, the masking and 316// the zero-masking variant of the scalar instruction. 317multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 318 dag Outs, dag Ins, string OpcodeStr, 319 string AttSrcAsm, string IntelSrcAsm, 320 dag RHS> : 321 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 322 RHS, 0, 0, 0, X86selects_mask>; 323 324// Similar to AVX512_maskable but in this case one of the source operands 325// ($src1) is already tied to $dst so we just use that for the preserved 326// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 327// $src1. 328multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 329 dag Outs, dag NonTiedIns, string OpcodeStr, 330 string AttSrcAsm, string IntelSrcAsm, 331 dag RHS, 332 bit IsCommutable = 0, 333 bit IsKCommutable = 0, 334 SDPatternOperator Select = vselect_mask, 335 bit MaskOnly = 0> : 336 AVX512_maskable_common<O, F, _, Outs, 337 !con((ins _.RC:$src1), NonTiedIns), 338 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 339 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 340 OpcodeStr, AttSrcAsm, IntelSrcAsm, 341 !if(MaskOnly, (null_frag), RHS), 342 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 343 Select, "", IsCommutable, IsKCommutable>; 344 345// Similar to AVX512_maskable_3src but in this case the input VT for the tied 346// operand differs from the output VT. This requires a bitconvert on 347// the preserved vector going into the vselect. 348// NOTE: The unmasked pattern is disabled. 349multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 350 X86VectorVTInfo InVT, 351 dag Outs, dag NonTiedIns, string OpcodeStr, 352 string AttSrcAsm, string IntelSrcAsm, 353 dag RHS, bit IsCommutable = 0> : 354 AVX512_maskable_common<O, F, OutVT, Outs, 355 !con((ins InVT.RC:$src1), NonTiedIns), 356 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 357 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 358 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 359 (vselect_mask InVT.KRCWM:$mask, RHS, 360 (bitconvert InVT.RC:$src1)), 361 vselect_mask, "", IsCommutable>; 362 363multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 364 dag Outs, dag NonTiedIns, string OpcodeStr, 365 string AttSrcAsm, string IntelSrcAsm, 366 dag RHS, 367 bit IsCommutable = 0, 368 bit IsKCommutable = 0, 369 bit MaskOnly = 0> : 370 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 371 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 372 X86selects_mask, MaskOnly>; 373 374multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 375 dag Outs, dag Ins, 376 string OpcodeStr, 377 string AttSrcAsm, string IntelSrcAsm, 378 list<dag> Pattern> : 379 AVX512_maskable_custom<O, F, Outs, Ins, 380 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 381 !con((ins _.KRCWM:$mask), Ins), 382 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 383 "$src0 = $dst">; 384 385multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 386 dag Outs, dag NonTiedIns, 387 string OpcodeStr, 388 string AttSrcAsm, string IntelSrcAsm, 389 list<dag> Pattern> : 390 AVX512_maskable_custom<O, F, Outs, 391 !con((ins _.RC:$src1), NonTiedIns), 392 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 393 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 394 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 395 "">; 396 397// Instruction with mask that puts result in mask register, 398// like "compare" and "vptest" 399multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 400 dag Outs, 401 dag Ins, dag MaskingIns, 402 string OpcodeStr, 403 string AttSrcAsm, string IntelSrcAsm, 404 list<dag> Pattern, 405 list<dag> MaskingPattern, 406 bit IsCommutable = 0> { 407 let isCommutable = IsCommutable in { 408 def NAME: AVX512<O, F, Outs, Ins, 409 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 410 "$dst, "#IntelSrcAsm#"}", 411 Pattern>; 412 413 def NAME#k: AVX512<O, F, Outs, MaskingIns, 414 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 415 "$dst {${mask}}, "#IntelSrcAsm#"}", 416 MaskingPattern>, EVEX_K; 417 } 418} 419 420multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 421 dag Outs, 422 dag Ins, dag MaskingIns, 423 string OpcodeStr, 424 string AttSrcAsm, string IntelSrcAsm, 425 dag RHS, dag MaskingRHS, 426 bit IsCommutable = 0> : 427 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 428 AttSrcAsm, IntelSrcAsm, 429 [(set _.KRC:$dst, RHS)], 430 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 431 432multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 433 dag Outs, dag Ins, string OpcodeStr, 434 string AttSrcAsm, string IntelSrcAsm, 435 dag RHS, dag RHS_su, bit IsCommutable = 0> : 436 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 437 !con((ins _.KRCWM:$mask), Ins), 438 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 439 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 440 441// Used by conversion instructions. 442multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 443 dag Outs, 444 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 445 string OpcodeStr, 446 string AttSrcAsm, string IntelSrcAsm, 447 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 448 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 449 AttSrcAsm, IntelSrcAsm, 450 [(set _.RC:$dst, RHS)], 451 [(set _.RC:$dst, MaskingRHS)], 452 [(set _.RC:$dst, ZeroMaskingRHS)], 453 "$src0 = $dst">; 454 455multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 456 dag Outs, dag NonTiedIns, string OpcodeStr, 457 string AttSrcAsm, string IntelSrcAsm, 458 dag RHS, dag MaskingRHS, bit IsCommutable, 459 bit IsKCommutable> : 460 AVX512_maskable_custom<O, F, Outs, 461 !con((ins _.RC:$src1), NonTiedIns), 462 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 463 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 464 OpcodeStr, AttSrcAsm, IntelSrcAsm, 465 [(set _.RC:$dst, RHS)], 466 [(set _.RC:$dst, 467 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 468 [(set _.RC:$dst, 469 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 470 "", IsCommutable, IsKCommutable>; 471 472// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 473// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 474// swizzled by ExecutionDomainFix to pxor. 475// We set canFoldAsLoad because this can be converted to a constant-pool 476// load of an all-zeros value if folding it would be beneficial. 477let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 478 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 479def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 480 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 481def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 482 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 483} 484 485let Predicates = [HasAVX512] in { 486def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 487def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 488def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 489def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 490def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 491def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 492} 493 494// Alias instructions that allow VPTERNLOG to be used with a mask to create 495// a mix of all ones and all zeros elements. This is done this way to force 496// the same register to be used as input for all three sources. 497let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 498def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 499 (ins VK16WM:$mask), "", 500 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 501 (v16i32 immAllOnesV), 502 (v16i32 immAllZerosV)))]>; 503def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 504 (ins VK8WM:$mask), "", 505 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 506 (v8i64 immAllOnesV), 507 (v8i64 immAllZerosV)))]>; 508} 509 510let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 511 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 512def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 513 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 514def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 515 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 516} 517 518let Predicates = [HasAVX512] in { 519def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 520def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 521def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 522def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 523def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 524def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 525def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 526def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 527def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 528def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 529def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 530def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 531} 532 533// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 534// This is expanded by ExpandPostRAPseudos. 535let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 536 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 537 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 538 [(set FR16X:$dst, fp16imm0)]>; 539 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 540 [(set FR32X:$dst, fp32imm0)]>; 541 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 542 [(set FR64X:$dst, fp64imm0)]>; 543 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 544 [(set VR128X:$dst, fp128imm0)]>; 545} 546 547//===----------------------------------------------------------------------===// 548// AVX-512 - VECTOR INSERT 549// 550 551// Supports two different pattern operators for mask and unmasked ops. Allows 552// null_frag to be passed for one. 553multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 554 X86VectorVTInfo To, 555 SDPatternOperator vinsert_insert, 556 SDPatternOperator vinsert_for_mask, 557 X86FoldableSchedWrite sched> { 558 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 559 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 560 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 561 "vinsert" # From.EltTypeName # "x" # From.NumElts, 562 "$src3, $src2, $src1", "$src1, $src2, $src3", 563 (vinsert_insert:$src3 (To.VT To.RC:$src1), 564 (From.VT From.RC:$src2), 565 (iPTR imm)), 566 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 567 (From.VT From.RC:$src2), 568 (iPTR imm))>, 569 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 570 let mayLoad = 1 in 571 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 572 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 573 "vinsert" # From.EltTypeName # "x" # From.NumElts, 574 "$src3, $src2, $src1", "$src1, $src2, $src3", 575 (vinsert_insert:$src3 (To.VT To.RC:$src1), 576 (From.VT (From.LdFrag addr:$src2)), 577 (iPTR imm)), 578 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 579 (From.VT (From.LdFrag addr:$src2)), 580 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 581 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 582 Sched<[sched.Folded, sched.ReadAfterFold]>; 583 } 584} 585 586// Passes the same pattern operator for masked and unmasked ops. 587multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 588 X86VectorVTInfo To, 589 SDPatternOperator vinsert_insert, 590 X86FoldableSchedWrite sched> : 591 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 592 593multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 594 X86VectorVTInfo To, PatFrag vinsert_insert, 595 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 596 let Predicates = p in { 597 def : Pat<(vinsert_insert:$ins 598 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 599 (To.VT (!cast<Instruction>(InstrStr#"rr") 600 To.RC:$src1, From.RC:$src2, 601 (INSERT_get_vinsert_imm To.RC:$ins)))>; 602 603 def : Pat<(vinsert_insert:$ins 604 (To.VT To.RC:$src1), 605 (From.VT (From.LdFrag addr:$src2)), 606 (iPTR imm)), 607 (To.VT (!cast<Instruction>(InstrStr#"rm") 608 To.RC:$src1, addr:$src2, 609 (INSERT_get_vinsert_imm To.RC:$ins)))>; 610 } 611} 612 613multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 614 ValueType EltVT64, int Opcode256, 615 X86FoldableSchedWrite sched> { 616 617 let Predicates = [HasVLX] in 618 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 619 X86VectorVTInfo< 4, EltVT32, VR128X>, 620 X86VectorVTInfo< 8, EltVT32, VR256X>, 621 vinsert128_insert, sched>, EVEX_V256; 622 623 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 624 X86VectorVTInfo< 4, EltVT32, VR128X>, 625 X86VectorVTInfo<16, EltVT32, VR512>, 626 vinsert128_insert, sched>, EVEX_V512; 627 628 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 629 X86VectorVTInfo< 4, EltVT64, VR256X>, 630 X86VectorVTInfo< 8, EltVT64, VR512>, 631 vinsert256_insert, sched>, VEX_W, EVEX_V512; 632 633 // Even with DQI we'd like to only use these instructions for masking. 634 let Predicates = [HasVLX, HasDQI] in 635 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 636 X86VectorVTInfo< 2, EltVT64, VR128X>, 637 X86VectorVTInfo< 4, EltVT64, VR256X>, 638 null_frag, vinsert128_insert, sched>, 639 VEX_W1X, EVEX_V256; 640 641 // Even with DQI we'd like to only use these instructions for masking. 642 let Predicates = [HasDQI] in { 643 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 644 X86VectorVTInfo< 2, EltVT64, VR128X>, 645 X86VectorVTInfo< 8, EltVT64, VR512>, 646 null_frag, vinsert128_insert, sched>, 647 VEX_W, EVEX_V512; 648 649 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 650 X86VectorVTInfo< 8, EltVT32, VR256X>, 651 X86VectorVTInfo<16, EltVT32, VR512>, 652 null_frag, vinsert256_insert, sched>, 653 EVEX_V512; 654 } 655} 656 657// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 658defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 659defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 660 661// Codegen pattern with the alternative types, 662// Even with AVX512DQ we'll still use these for unmasked operations. 663defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 664 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 665defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 666 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 667 668defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 669 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 670defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 671 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 672 673defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 674 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 675defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 676 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 677 678// Codegen pattern with the alternative types insert VEC128 into VEC256 679defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 680 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 681defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 682 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 683defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, 684 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 685// Codegen pattern with the alternative types insert VEC128 into VEC512 686defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 687 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 688defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 689 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 690defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, 691 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 692// Codegen pattern with the alternative types insert VEC256 into VEC512 693defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 694 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 695defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 696 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 697defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, 698 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 699 700 701multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 702 X86VectorVTInfo To, X86VectorVTInfo Cast, 703 PatFrag vinsert_insert, 704 SDNodeXForm INSERT_get_vinsert_imm, 705 list<Predicate> p> { 706let Predicates = p in { 707 def : Pat<(Cast.VT 708 (vselect_mask Cast.KRCWM:$mask, 709 (bitconvert 710 (vinsert_insert:$ins (To.VT To.RC:$src1), 711 (From.VT From.RC:$src2), 712 (iPTR imm))), 713 Cast.RC:$src0)), 714 (!cast<Instruction>(InstrStr#"rrk") 715 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 716 (INSERT_get_vinsert_imm To.RC:$ins))>; 717 def : Pat<(Cast.VT 718 (vselect_mask Cast.KRCWM:$mask, 719 (bitconvert 720 (vinsert_insert:$ins (To.VT To.RC:$src1), 721 (From.VT 722 (bitconvert 723 (From.LdFrag addr:$src2))), 724 (iPTR imm))), 725 Cast.RC:$src0)), 726 (!cast<Instruction>(InstrStr#"rmk") 727 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 728 (INSERT_get_vinsert_imm To.RC:$ins))>; 729 730 def : Pat<(Cast.VT 731 (vselect_mask Cast.KRCWM:$mask, 732 (bitconvert 733 (vinsert_insert:$ins (To.VT To.RC:$src1), 734 (From.VT From.RC:$src2), 735 (iPTR imm))), 736 Cast.ImmAllZerosV)), 737 (!cast<Instruction>(InstrStr#"rrkz") 738 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 739 (INSERT_get_vinsert_imm To.RC:$ins))>; 740 def : Pat<(Cast.VT 741 (vselect_mask Cast.KRCWM:$mask, 742 (bitconvert 743 (vinsert_insert:$ins (To.VT To.RC:$src1), 744 (From.VT (From.LdFrag addr:$src2)), 745 (iPTR imm))), 746 Cast.ImmAllZerosV)), 747 (!cast<Instruction>(InstrStr#"rmkz") 748 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 749 (INSERT_get_vinsert_imm To.RC:$ins))>; 750} 751} 752 753defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 754 v8f32x_info, vinsert128_insert, 755 INSERT_get_vinsert128_imm, [HasVLX]>; 756defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 757 v4f64x_info, vinsert128_insert, 758 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 759 760defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 761 v8i32x_info, vinsert128_insert, 762 INSERT_get_vinsert128_imm, [HasVLX]>; 763defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 764 v8i32x_info, vinsert128_insert, 765 INSERT_get_vinsert128_imm, [HasVLX]>; 766defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 767 v8i32x_info, vinsert128_insert, 768 INSERT_get_vinsert128_imm, [HasVLX]>; 769defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 770 v4i64x_info, vinsert128_insert, 771 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 772defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 773 v4i64x_info, vinsert128_insert, 774 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 775defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 776 v4i64x_info, vinsert128_insert, 777 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 778 779defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 780 v16f32_info, vinsert128_insert, 781 INSERT_get_vinsert128_imm, [HasAVX512]>; 782defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 783 v8f64_info, vinsert128_insert, 784 INSERT_get_vinsert128_imm, [HasDQI]>; 785 786defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 787 v16i32_info, vinsert128_insert, 788 INSERT_get_vinsert128_imm, [HasAVX512]>; 789defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 790 v16i32_info, vinsert128_insert, 791 INSERT_get_vinsert128_imm, [HasAVX512]>; 792defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 793 v16i32_info, vinsert128_insert, 794 INSERT_get_vinsert128_imm, [HasAVX512]>; 795defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 796 v8i64_info, vinsert128_insert, 797 INSERT_get_vinsert128_imm, [HasDQI]>; 798defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 799 v8i64_info, vinsert128_insert, 800 INSERT_get_vinsert128_imm, [HasDQI]>; 801defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 802 v8i64_info, vinsert128_insert, 803 INSERT_get_vinsert128_imm, [HasDQI]>; 804 805defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 806 v16f32_info, vinsert256_insert, 807 INSERT_get_vinsert256_imm, [HasDQI]>; 808defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 809 v8f64_info, vinsert256_insert, 810 INSERT_get_vinsert256_imm, [HasAVX512]>; 811 812defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 813 v16i32_info, vinsert256_insert, 814 INSERT_get_vinsert256_imm, [HasDQI]>; 815defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 816 v16i32_info, vinsert256_insert, 817 INSERT_get_vinsert256_imm, [HasDQI]>; 818defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 819 v16i32_info, vinsert256_insert, 820 INSERT_get_vinsert256_imm, [HasDQI]>; 821defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 822 v8i64_info, vinsert256_insert, 823 INSERT_get_vinsert256_imm, [HasAVX512]>; 824defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 825 v8i64_info, vinsert256_insert, 826 INSERT_get_vinsert256_imm, [HasAVX512]>; 827defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 828 v8i64_info, vinsert256_insert, 829 INSERT_get_vinsert256_imm, [HasAVX512]>; 830 831// vinsertps - insert f32 to XMM 832let ExeDomain = SSEPackedSingle in { 833let isCommutable = 1 in 834def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 835 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 836 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 837 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 838 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 839def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 840 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 841 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 842 [(set VR128X:$dst, (X86insertps VR128X:$src1, 843 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 844 timm:$src3))]>, 845 EVEX_4V, EVEX_CD8<32, CD8VT1>, 846 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 847} 848 849//===----------------------------------------------------------------------===// 850// AVX-512 VECTOR EXTRACT 851//--- 852 853// Supports two different pattern operators for mask and unmasked ops. Allows 854// null_frag to be passed for one. 855multiclass vextract_for_size_split<int Opcode, 856 X86VectorVTInfo From, X86VectorVTInfo To, 857 SDPatternOperator vextract_extract, 858 SDPatternOperator vextract_for_mask, 859 SchedWrite SchedRR, SchedWrite SchedMR> { 860 861 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 862 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 863 (ins From.RC:$src1, u8imm:$idx), 864 "vextract" # To.EltTypeName # "x" # To.NumElts, 865 "$idx, $src1", "$src1, $idx", 866 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 867 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 868 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 869 870 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 871 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 872 "vextract" # To.EltTypeName # "x" # To.NumElts # 873 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 874 [(store (To.VT (vextract_extract:$idx 875 (From.VT From.RC:$src1), (iPTR imm))), 876 addr:$dst)]>, EVEX, 877 Sched<[SchedMR]>; 878 879 let mayStore = 1, hasSideEffects = 0 in 880 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 881 (ins To.MemOp:$dst, To.KRCWM:$mask, 882 From.RC:$src1, u8imm:$idx), 883 "vextract" # To.EltTypeName # "x" # To.NumElts # 884 "\t{$idx, $src1, $dst {${mask}}|" 885 "$dst {${mask}}, $src1, $idx}", []>, 886 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 887 } 888} 889 890// Passes the same pattern operator for masked and unmasked ops. 891multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 892 X86VectorVTInfo To, 893 SDPatternOperator vextract_extract, 894 SchedWrite SchedRR, SchedWrite SchedMR> : 895 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 896 897// Codegen pattern for the alternative types 898multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 899 X86VectorVTInfo To, PatFrag vextract_extract, 900 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 901 let Predicates = p in { 902 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 903 (To.VT (!cast<Instruction>(InstrStr#"rr") 904 From.RC:$src1, 905 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 906 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 907 (iPTR imm))), addr:$dst), 908 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 909 (EXTRACT_get_vextract_imm To.RC:$ext))>; 910 } 911} 912 913multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 914 ValueType EltVT64, int Opcode256, 915 SchedWrite SchedRR, SchedWrite SchedMR> { 916 let Predicates = [HasAVX512] in { 917 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 918 X86VectorVTInfo<16, EltVT32, VR512>, 919 X86VectorVTInfo< 4, EltVT32, VR128X>, 920 vextract128_extract, SchedRR, SchedMR>, 921 EVEX_V512, EVEX_CD8<32, CD8VT4>; 922 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 923 X86VectorVTInfo< 8, EltVT64, VR512>, 924 X86VectorVTInfo< 4, EltVT64, VR256X>, 925 vextract256_extract, SchedRR, SchedMR>, 926 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 927 } 928 let Predicates = [HasVLX] in 929 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 930 X86VectorVTInfo< 8, EltVT32, VR256X>, 931 X86VectorVTInfo< 4, EltVT32, VR128X>, 932 vextract128_extract, SchedRR, SchedMR>, 933 EVEX_V256, EVEX_CD8<32, CD8VT4>; 934 935 // Even with DQI we'd like to only use these instructions for masking. 936 let Predicates = [HasVLX, HasDQI] in 937 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 938 X86VectorVTInfo< 4, EltVT64, VR256X>, 939 X86VectorVTInfo< 2, EltVT64, VR128X>, 940 null_frag, vextract128_extract, SchedRR, SchedMR>, 941 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 942 943 // Even with DQI we'd like to only use these instructions for masking. 944 let Predicates = [HasDQI] in { 945 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 946 X86VectorVTInfo< 8, EltVT64, VR512>, 947 X86VectorVTInfo< 2, EltVT64, VR128X>, 948 null_frag, vextract128_extract, SchedRR, SchedMR>, 949 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 950 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 951 X86VectorVTInfo<16, EltVT32, VR512>, 952 X86VectorVTInfo< 8, EltVT32, VR256X>, 953 null_frag, vextract256_extract, SchedRR, SchedMR>, 954 EVEX_V512, EVEX_CD8<32, CD8VT8>; 955 } 956} 957 958// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 959defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 960defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 961 962// extract_subvector codegen patterns with the alternative types. 963// Even with AVX512DQ we'll still use these for unmasked operations. 964defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 965 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 966defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 967 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 968 969defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 970 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 971defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 972 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 973 974defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 975 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 976defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 977 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 978 979// Codegen pattern with the alternative types extract VEC128 from VEC256 980defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 981 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 982defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 983 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 984defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, 985 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 986 987// Codegen pattern with the alternative types extract VEC128 from VEC512 988defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 989 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 990defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 991 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 992defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, 993 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 994// Codegen pattern with the alternative types extract VEC256 from VEC512 995defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 996 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 997defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 998 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 999defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, 1000 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 1001 1002 1003// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1004// smaller extract to enable EVEX->VEX. 1005let Predicates = [NoVLX] in { 1006def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1007 (v2i64 (VEXTRACTI128rr 1008 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1009 (iPTR 1)))>; 1010def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1011 (v2f64 (VEXTRACTF128rr 1012 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1013 (iPTR 1)))>; 1014def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1015 (v4i32 (VEXTRACTI128rr 1016 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1017 (iPTR 1)))>; 1018def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1019 (v4f32 (VEXTRACTF128rr 1020 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1021 (iPTR 1)))>; 1022def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1023 (v8i16 (VEXTRACTI128rr 1024 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1025 (iPTR 1)))>; 1026def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 1027 (v8f16 (VEXTRACTF128rr 1028 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 1029 (iPTR 1)))>; 1030def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1031 (v16i8 (VEXTRACTI128rr 1032 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1033 (iPTR 1)))>; 1034} 1035 1036// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1037// smaller extract to enable EVEX->VEX. 1038let Predicates = [HasVLX] in { 1039def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1040 (v2i64 (VEXTRACTI32x4Z256rr 1041 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1042 (iPTR 1)))>; 1043def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1044 (v2f64 (VEXTRACTF32x4Z256rr 1045 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1046 (iPTR 1)))>; 1047def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1048 (v4i32 (VEXTRACTI32x4Z256rr 1049 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1050 (iPTR 1)))>; 1051def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1052 (v4f32 (VEXTRACTF32x4Z256rr 1053 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1054 (iPTR 1)))>; 1055def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1056 (v8i16 (VEXTRACTI32x4Z256rr 1057 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1058 (iPTR 1)))>; 1059def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 1060 (v8f16 (VEXTRACTF32x4Z256rr 1061 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 1062 (iPTR 1)))>; 1063def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1064 (v16i8 (VEXTRACTI32x4Z256rr 1065 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1066 (iPTR 1)))>; 1067} 1068 1069 1070// Additional patterns for handling a bitcast between the vselect and the 1071// extract_subvector. 1072multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 1073 X86VectorVTInfo To, X86VectorVTInfo Cast, 1074 PatFrag vextract_extract, 1075 SDNodeXForm EXTRACT_get_vextract_imm, 1076 list<Predicate> p> { 1077let Predicates = p in { 1078 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1079 (bitconvert 1080 (To.VT (vextract_extract:$ext 1081 (From.VT From.RC:$src), (iPTR imm)))), 1082 To.RC:$src0)), 1083 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1084 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1085 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1086 1087 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1088 (bitconvert 1089 (To.VT (vextract_extract:$ext 1090 (From.VT From.RC:$src), (iPTR imm)))), 1091 Cast.ImmAllZerosV)), 1092 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1093 Cast.KRCWM:$mask, From.RC:$src, 1094 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1095} 1096} 1097 1098defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1099 v4f32x_info, vextract128_extract, 1100 EXTRACT_get_vextract128_imm, [HasVLX]>; 1101defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1102 v2f64x_info, vextract128_extract, 1103 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1104 1105defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1106 v4i32x_info, vextract128_extract, 1107 EXTRACT_get_vextract128_imm, [HasVLX]>; 1108defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1109 v4i32x_info, vextract128_extract, 1110 EXTRACT_get_vextract128_imm, [HasVLX]>; 1111defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1112 v4i32x_info, vextract128_extract, 1113 EXTRACT_get_vextract128_imm, [HasVLX]>; 1114defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1115 v2i64x_info, vextract128_extract, 1116 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1117defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1118 v2i64x_info, vextract128_extract, 1119 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1120defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1121 v2i64x_info, vextract128_extract, 1122 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1123 1124defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1125 v4f32x_info, vextract128_extract, 1126 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1127defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1128 v2f64x_info, vextract128_extract, 1129 EXTRACT_get_vextract128_imm, [HasDQI]>; 1130 1131defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1132 v4i32x_info, vextract128_extract, 1133 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1134defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1135 v4i32x_info, vextract128_extract, 1136 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1137defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1138 v4i32x_info, vextract128_extract, 1139 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1140defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1141 v2i64x_info, vextract128_extract, 1142 EXTRACT_get_vextract128_imm, [HasDQI]>; 1143defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1144 v2i64x_info, vextract128_extract, 1145 EXTRACT_get_vextract128_imm, [HasDQI]>; 1146defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1147 v2i64x_info, vextract128_extract, 1148 EXTRACT_get_vextract128_imm, [HasDQI]>; 1149 1150defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1151 v8f32x_info, vextract256_extract, 1152 EXTRACT_get_vextract256_imm, [HasDQI]>; 1153defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1154 v4f64x_info, vextract256_extract, 1155 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1156 1157defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1158 v8i32x_info, vextract256_extract, 1159 EXTRACT_get_vextract256_imm, [HasDQI]>; 1160defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1161 v8i32x_info, vextract256_extract, 1162 EXTRACT_get_vextract256_imm, [HasDQI]>; 1163defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1164 v8i32x_info, vextract256_extract, 1165 EXTRACT_get_vextract256_imm, [HasDQI]>; 1166defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1167 v4i64x_info, vextract256_extract, 1168 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1169defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1170 v4i64x_info, vextract256_extract, 1171 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1172defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1173 v4i64x_info, vextract256_extract, 1174 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1175 1176// vextractps - extract 32 bits from XMM 1177def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1178 (ins VR128X:$src1, u8imm:$src2), 1179 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1180 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1181 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1182 1183def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1184 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1185 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1186 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1187 addr:$dst)]>, 1188 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1189 1190//===---------------------------------------------------------------------===// 1191// AVX-512 BROADCAST 1192//--- 1193// broadcast with a scalar argument. 1194multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1195 X86VectorVTInfo SrcInfo> { 1196 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1197 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1198 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1199 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1200 (X86VBroadcast SrcInfo.FRC:$src), 1201 DestInfo.RC:$src0)), 1202 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1203 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1204 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1205 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1206 (X86VBroadcast SrcInfo.FRC:$src), 1207 DestInfo.ImmAllZerosV)), 1208 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1209 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1210} 1211 1212// Split version to allow mask and broadcast node to be different types. This 1213// helps support the 32x2 broadcasts. 1214multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1215 SchedWrite SchedRR, SchedWrite SchedRM, 1216 X86VectorVTInfo MaskInfo, 1217 X86VectorVTInfo DestInfo, 1218 X86VectorVTInfo SrcInfo, 1219 bit IsConvertibleToThreeAddress, 1220 SDPatternOperator UnmaskedOp = X86VBroadcast, 1221 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1222 let hasSideEffects = 0 in 1223 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1224 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1225 [(set MaskInfo.RC:$dst, 1226 (MaskInfo.VT 1227 (bitconvert 1228 (DestInfo.VT 1229 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1230 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1231 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1232 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1233 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1234 "${dst} {${mask}} {z}, $src}"), 1235 [(set MaskInfo.RC:$dst, 1236 (vselect_mask MaskInfo.KRCWM:$mask, 1237 (MaskInfo.VT 1238 (bitconvert 1239 (DestInfo.VT 1240 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1241 MaskInfo.ImmAllZerosV))], 1242 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1243 let Constraints = "$src0 = $dst" in 1244 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1245 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1246 SrcInfo.RC:$src), 1247 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1248 "${dst} {${mask}}, $src}"), 1249 [(set MaskInfo.RC:$dst, 1250 (vselect_mask MaskInfo.KRCWM:$mask, 1251 (MaskInfo.VT 1252 (bitconvert 1253 (DestInfo.VT 1254 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1255 MaskInfo.RC:$src0))], 1256 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1257 1258 let hasSideEffects = 0, mayLoad = 1 in 1259 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1260 (ins SrcInfo.ScalarMemOp:$src), 1261 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1262 [(set MaskInfo.RC:$dst, 1263 (MaskInfo.VT 1264 (bitconvert 1265 (DestInfo.VT 1266 (UnmaskedBcastOp addr:$src)))))], 1267 DestInfo.ExeDomain>, T8PD, EVEX, 1268 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1269 1270 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1271 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1272 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1273 "${dst} {${mask}} {z}, $src}"), 1274 [(set MaskInfo.RC:$dst, 1275 (vselect_mask MaskInfo.KRCWM:$mask, 1276 (MaskInfo.VT 1277 (bitconvert 1278 (DestInfo.VT 1279 (SrcInfo.BroadcastLdFrag addr:$src)))), 1280 MaskInfo.ImmAllZerosV))], 1281 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1282 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1283 1284 let Constraints = "$src0 = $dst", 1285 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1286 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1287 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1288 SrcInfo.ScalarMemOp:$src), 1289 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1290 "${dst} {${mask}}, $src}"), 1291 [(set MaskInfo.RC:$dst, 1292 (vselect_mask MaskInfo.KRCWM:$mask, 1293 (MaskInfo.VT 1294 (bitconvert 1295 (DestInfo.VT 1296 (SrcInfo.BroadcastLdFrag addr:$src)))), 1297 MaskInfo.RC:$src0))], 1298 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1299 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1300} 1301 1302// Helper class to force mask and broadcast result to same type. 1303multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1304 SchedWrite SchedRR, SchedWrite SchedRM, 1305 X86VectorVTInfo DestInfo, 1306 X86VectorVTInfo SrcInfo, 1307 bit IsConvertibleToThreeAddress> : 1308 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1309 DestInfo, DestInfo, SrcInfo, 1310 IsConvertibleToThreeAddress>; 1311 1312multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1313 AVX512VLVectorVTInfo _> { 1314 let Predicates = [HasAVX512] in { 1315 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1316 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1317 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1318 EVEX_V512; 1319 } 1320 1321 let Predicates = [HasVLX] in { 1322 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1323 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1324 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1325 EVEX_V256; 1326 } 1327} 1328 1329multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1330 AVX512VLVectorVTInfo _> { 1331 let Predicates = [HasAVX512] in { 1332 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1333 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1334 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1335 EVEX_V512; 1336 } 1337 1338 let Predicates = [HasVLX] in { 1339 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1340 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1341 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1342 EVEX_V256; 1343 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1344 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1345 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1346 EVEX_V128; 1347 } 1348} 1349defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1350 avx512vl_f32_info>; 1351defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1352 avx512vl_f64_info>, VEX_W1X; 1353 1354multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1355 X86VectorVTInfo _, SDPatternOperator OpNode, 1356 RegisterClass SrcRC> { 1357 // Fold with a mask even if it has multiple uses since it is cheap. 1358 let ExeDomain = _.ExeDomain in 1359 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1360 (ins SrcRC:$src), 1361 "vpbroadcast"#_.Suffix, "$src", "$src", 1362 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1363 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1364 T8PD, EVEX, Sched<[SchedRR]>; 1365} 1366 1367multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1368 X86VectorVTInfo _, SDPatternOperator OpNode, 1369 RegisterClass SrcRC, SubRegIndex Subreg> { 1370 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1371 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1372 (outs _.RC:$dst), (ins GR32:$src), 1373 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1374 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1375 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1376 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1377 1378 def : Pat <(_.VT (OpNode SrcRC:$src)), 1379 (!cast<Instruction>(Name#rr) 1380 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1381 1382 // Fold with a mask even if it has multiple uses since it is cheap. 1383 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1384 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1385 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1386 1387 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1388 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1389 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1390} 1391 1392multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1393 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1394 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1395 let Predicates = [prd] in 1396 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1397 OpNode, SrcRC, Subreg>, EVEX_V512; 1398 let Predicates = [prd, HasVLX] in { 1399 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1400 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1401 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1402 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1403 } 1404} 1405 1406multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1407 SDPatternOperator OpNode, 1408 RegisterClass SrcRC, Predicate prd> { 1409 let Predicates = [prd] in 1410 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1411 SrcRC>, EVEX_V512; 1412 let Predicates = [prd, HasVLX] in { 1413 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1414 SrcRC>, EVEX_V256; 1415 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1416 SrcRC>, EVEX_V128; 1417 } 1418} 1419 1420defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1421 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1422defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1423 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1424 HasBWI>; 1425defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1426 X86VBroadcast, GR32, HasAVX512>; 1427defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1428 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1429 1430multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1431 AVX512VLVectorVTInfo _, Predicate prd, 1432 bit IsConvertibleToThreeAddress> { 1433 let Predicates = [prd] in { 1434 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1435 WriteShuffle256Ld, _.info512, _.info128, 1436 IsConvertibleToThreeAddress>, 1437 EVEX_V512; 1438 } 1439 let Predicates = [prd, HasVLX] in { 1440 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1441 WriteShuffle256Ld, _.info256, _.info128, 1442 IsConvertibleToThreeAddress>, 1443 EVEX_V256; 1444 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1445 WriteShuffleXLd, _.info128, _.info128, 1446 IsConvertibleToThreeAddress>, 1447 EVEX_V128; 1448 } 1449} 1450 1451defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1452 avx512vl_i8_info, HasBWI, 0>; 1453defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1454 avx512vl_i16_info, HasBWI, 0>; 1455defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1456 avx512vl_i32_info, HasAVX512, 1>; 1457defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1458 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1459 1460multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1461 SDPatternOperator OpNode, 1462 X86VectorVTInfo _Dst, 1463 X86VectorVTInfo _Src> { 1464 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1465 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1466 (_Dst.VT (OpNode addr:$src))>, 1467 Sched<[SchedWriteShuffle.YMM.Folded]>, 1468 AVX5128IBase, EVEX; 1469} 1470 1471// This should be used for the AVX512DQ broadcast instructions. It disables 1472// the unmasked patterns so that we only use the DQ instructions when masking 1473// is requested. 1474multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1475 SDPatternOperator OpNode, 1476 X86VectorVTInfo _Dst, 1477 X86VectorVTInfo _Src> { 1478 let hasSideEffects = 0, mayLoad = 1 in 1479 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1480 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1481 (null_frag), 1482 (_Dst.VT (OpNode addr:$src))>, 1483 Sched<[SchedWriteShuffle.YMM.Folded]>, 1484 AVX5128IBase, EVEX; 1485} 1486let Predicates = [HasBWI] in { 1487 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1488 (VPBROADCASTWZrm addr:$src)>; 1489 1490 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1491 (VPBROADCASTWZrr VR128X:$src)>; 1492 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1493 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1494} 1495let Predicates = [HasVLX, HasBWI] in { 1496 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1497 (VPBROADCASTWZ128rm addr:$src)>; 1498 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1499 (VPBROADCASTWZ256rm addr:$src)>; 1500 1501 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1502 (VPBROADCASTWZ128rr VR128X:$src)>; 1503 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1504 (VPBROADCASTWZ256rr VR128X:$src)>; 1505 1506 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1507 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1508 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1509 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1510} 1511 1512//===----------------------------------------------------------------------===// 1513// AVX-512 BROADCAST SUBVECTORS 1514// 1515 1516defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1517 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1518 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1519defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1520 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1521 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1522defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1523 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, 1524 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1525defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1526 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, 1527 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1528 1529let Predicates = [HasAVX512] in { 1530def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1531 (VBROADCASTF64X4rm addr:$src)>; 1532def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1533 (VBROADCASTF64X4rm addr:$src)>; 1534def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1535 (VBROADCASTF64X4rm addr:$src)>; 1536def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1537 (VBROADCASTI64X4rm addr:$src)>; 1538def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1539 (VBROADCASTI64X4rm addr:$src)>; 1540def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1541 (VBROADCASTI64X4rm addr:$src)>; 1542def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1543 (VBROADCASTI64X4rm addr:$src)>; 1544 1545def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1546 (VBROADCASTF32X4rm addr:$src)>; 1547def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1548 (VBROADCASTF32X4rm addr:$src)>; 1549def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1550 (VBROADCASTF32X4rm addr:$src)>; 1551def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1552 (VBROADCASTI32X4rm addr:$src)>; 1553def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1554 (VBROADCASTI32X4rm addr:$src)>; 1555def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1556 (VBROADCASTI32X4rm addr:$src)>; 1557def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1558 (VBROADCASTI32X4rm addr:$src)>; 1559 1560// Patterns for selects of bitcasted operations. 1561def : Pat<(vselect_mask VK16WM:$mask, 1562 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1563 (v16f32 immAllZerosV)), 1564 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1565def : Pat<(vselect_mask VK16WM:$mask, 1566 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1567 VR512:$src0), 1568 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1569def : Pat<(vselect_mask VK16WM:$mask, 1570 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1571 (v16i32 immAllZerosV)), 1572 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1573def : Pat<(vselect_mask VK16WM:$mask, 1574 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1575 VR512:$src0), 1576 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1577 1578def : Pat<(vselect_mask VK8WM:$mask, 1579 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1580 (v8f64 immAllZerosV)), 1581 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1582def : Pat<(vselect_mask VK8WM:$mask, 1583 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1584 VR512:$src0), 1585 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1586def : Pat<(vselect_mask VK8WM:$mask, 1587 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1588 (v8i64 immAllZerosV)), 1589 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1590def : Pat<(vselect_mask VK8WM:$mask, 1591 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1592 VR512:$src0), 1593 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1594} 1595 1596let Predicates = [HasVLX] in { 1597defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1598 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1599 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1600defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1601 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1602 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1603 1604def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1605 (VBROADCASTF32X4Z256rm addr:$src)>; 1606def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1607 (VBROADCASTF32X4Z256rm addr:$src)>; 1608def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1609 (VBROADCASTF32X4Z256rm addr:$src)>; 1610def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1611 (VBROADCASTI32X4Z256rm addr:$src)>; 1612def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1613 (VBROADCASTI32X4Z256rm addr:$src)>; 1614def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1615 (VBROADCASTI32X4Z256rm addr:$src)>; 1616def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1617 (VBROADCASTI32X4Z256rm addr:$src)>; 1618 1619// Patterns for selects of bitcasted operations. 1620def : Pat<(vselect_mask VK8WM:$mask, 1621 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1622 (v8f32 immAllZerosV)), 1623 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1624def : Pat<(vselect_mask VK8WM:$mask, 1625 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1626 VR256X:$src0), 1627 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1628def : Pat<(vselect_mask VK8WM:$mask, 1629 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1630 (v8i32 immAllZerosV)), 1631 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1632def : Pat<(vselect_mask VK8WM:$mask, 1633 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1634 VR256X:$src0), 1635 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1636} 1637 1638let Predicates = [HasVLX, HasDQI] in { 1639defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1640 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, 1641 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1642defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1643 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, 1644 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1645 1646// Patterns for selects of bitcasted operations. 1647def : Pat<(vselect_mask VK4WM:$mask, 1648 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1649 (v4f64 immAllZerosV)), 1650 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1651def : Pat<(vselect_mask VK4WM:$mask, 1652 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1653 VR256X:$src0), 1654 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1655def : Pat<(vselect_mask VK4WM:$mask, 1656 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1657 (v4i64 immAllZerosV)), 1658 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1659def : Pat<(vselect_mask VK4WM:$mask, 1660 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1661 VR256X:$src0), 1662 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1663} 1664 1665let Predicates = [HasDQI] in { 1666defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1667 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, 1668 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1669defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1670 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1671 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1672defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1673 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, 1674 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1675defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1676 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1677 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1678 1679// Patterns for selects of bitcasted operations. 1680def : Pat<(vselect_mask VK16WM:$mask, 1681 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1682 (v16f32 immAllZerosV)), 1683 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1684def : Pat<(vselect_mask VK16WM:$mask, 1685 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1686 VR512:$src0), 1687 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1688def : Pat<(vselect_mask VK16WM:$mask, 1689 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1690 (v16i32 immAllZerosV)), 1691 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1692def : Pat<(vselect_mask VK16WM:$mask, 1693 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1694 VR512:$src0), 1695 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1696 1697def : Pat<(vselect_mask VK8WM:$mask, 1698 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1699 (v8f64 immAllZerosV)), 1700 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1701def : Pat<(vselect_mask VK8WM:$mask, 1702 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1703 VR512:$src0), 1704 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1705def : Pat<(vselect_mask VK8WM:$mask, 1706 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1707 (v8i64 immAllZerosV)), 1708 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1709def : Pat<(vselect_mask VK8WM:$mask, 1710 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1711 VR512:$src0), 1712 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1713} 1714 1715multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1716 AVX512VLVectorVTInfo _Dst, 1717 AVX512VLVectorVTInfo _Src> { 1718 let Predicates = [HasDQI] in 1719 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1720 WriteShuffle256Ld, _Dst.info512, 1721 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1722 EVEX_V512; 1723 let Predicates = [HasDQI, HasVLX] in 1724 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1725 WriteShuffle256Ld, _Dst.info256, 1726 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1727 EVEX_V256; 1728} 1729 1730multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1731 AVX512VLVectorVTInfo _Dst, 1732 AVX512VLVectorVTInfo _Src> : 1733 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1734 1735 let Predicates = [HasDQI, HasVLX] in 1736 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1737 WriteShuffleXLd, _Dst.info128, 1738 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1739 EVEX_V128; 1740} 1741 1742defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1743 avx512vl_i32_info, avx512vl_i64_info>; 1744defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1745 avx512vl_f32_info, avx512vl_f64_info>; 1746 1747//===----------------------------------------------------------------------===// 1748// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1749//--- 1750multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1751 X86VectorVTInfo _, RegisterClass KRC> { 1752 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1753 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1754 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1755 EVEX, Sched<[WriteShuffle]>; 1756} 1757 1758multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1759 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1760 let Predicates = [HasCDI] in 1761 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1762 let Predicates = [HasCDI, HasVLX] in { 1763 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1764 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1765 } 1766} 1767 1768defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1769 avx512vl_i32_info, VK16>; 1770defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1771 avx512vl_i64_info, VK8>, VEX_W; 1772 1773//===----------------------------------------------------------------------===// 1774// -- VPERMI2 - 3 source operands form -- 1775multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1776 X86FoldableSchedWrite sched, 1777 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1778let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1779 hasSideEffects = 0 in { 1780 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1781 (ins _.RC:$src2, _.RC:$src3), 1782 OpcodeStr, "$src3, $src2", "$src2, $src3", 1783 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1784 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1785 1786 let mayLoad = 1 in 1787 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1788 (ins _.RC:$src2, _.MemOp:$src3), 1789 OpcodeStr, "$src3, $src2", "$src2, $src3", 1790 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1791 (_.VT (_.LdFrag addr:$src3)))), 1>, 1792 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1793 } 1794} 1795 1796multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1797 X86FoldableSchedWrite sched, 1798 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1799 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1800 hasSideEffects = 0, mayLoad = 1 in 1801 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1802 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1803 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1804 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1805 (_.VT (X86VPermt2 _.RC:$src2, 1806 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1807 AVX5128IBase, EVEX_4V, EVEX_B, 1808 Sched<[sched.Folded, sched.ReadAfterFold]>; 1809} 1810 1811multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1812 X86FoldableSchedWrite sched, 1813 AVX512VLVectorVTInfo VTInfo, 1814 AVX512VLVectorVTInfo ShuffleMask> { 1815 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1816 ShuffleMask.info512>, 1817 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1818 ShuffleMask.info512>, EVEX_V512; 1819 let Predicates = [HasVLX] in { 1820 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1821 ShuffleMask.info128>, 1822 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1823 ShuffleMask.info128>, EVEX_V128; 1824 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1825 ShuffleMask.info256>, 1826 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1827 ShuffleMask.info256>, EVEX_V256; 1828 } 1829} 1830 1831multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1832 X86FoldableSchedWrite sched, 1833 AVX512VLVectorVTInfo VTInfo, 1834 AVX512VLVectorVTInfo Idx, 1835 Predicate Prd> { 1836 let Predicates = [Prd] in 1837 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1838 Idx.info512>, EVEX_V512; 1839 let Predicates = [Prd, HasVLX] in { 1840 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1841 Idx.info128>, EVEX_V128; 1842 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1843 Idx.info256>, EVEX_V256; 1844 } 1845} 1846 1847defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1848 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1849defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1850 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1851defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1852 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1853 VEX_W, EVEX_CD8<16, CD8VF>; 1854defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1855 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1856 EVEX_CD8<8, CD8VF>; 1857defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1858 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1859defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1860 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1861 1862// Extra patterns to deal with extra bitcasts due to passthru and index being 1863// different types on the fp versions. 1864multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1865 X86VectorVTInfo IdxVT, 1866 X86VectorVTInfo CastVT> { 1867 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1868 (X86VPermt2 (_.VT _.RC:$src2), 1869 (IdxVT.VT (bitconvert 1870 (CastVT.VT _.RC:$src1))), 1871 _.RC:$src3), 1872 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1873 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1874 _.RC:$src2, _.RC:$src3)>; 1875 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1876 (X86VPermt2 _.RC:$src2, 1877 (IdxVT.VT (bitconvert 1878 (CastVT.VT _.RC:$src1))), 1879 (_.LdFrag addr:$src3)), 1880 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1881 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1882 _.RC:$src2, addr:$src3)>; 1883 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1884 (X86VPermt2 _.RC:$src2, 1885 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1886 (_.BroadcastLdFrag addr:$src3)), 1887 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1888 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1889 _.RC:$src2, addr:$src3)>; 1890} 1891 1892// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1893defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1894defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1895defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1896 1897// VPERMT2 1898multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1899 X86FoldableSchedWrite sched, 1900 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1901let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1902 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1903 (ins IdxVT.RC:$src2, _.RC:$src3), 1904 OpcodeStr, "$src3, $src2", "$src2, $src3", 1905 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1906 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1907 1908 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1909 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1910 OpcodeStr, "$src3, $src2", "$src2, $src3", 1911 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1912 (_.LdFrag addr:$src3))), 1>, 1913 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1914 } 1915} 1916multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1917 X86FoldableSchedWrite sched, 1918 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1919 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1920 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1921 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1922 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1923 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1924 (_.VT (X86VPermt2 _.RC:$src1, 1925 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1926 AVX5128IBase, EVEX_4V, EVEX_B, 1927 Sched<[sched.Folded, sched.ReadAfterFold]>; 1928} 1929 1930multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1931 X86FoldableSchedWrite sched, 1932 AVX512VLVectorVTInfo VTInfo, 1933 AVX512VLVectorVTInfo ShuffleMask> { 1934 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1935 ShuffleMask.info512>, 1936 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1937 ShuffleMask.info512>, EVEX_V512; 1938 let Predicates = [HasVLX] in { 1939 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1940 ShuffleMask.info128>, 1941 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1942 ShuffleMask.info128>, EVEX_V128; 1943 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1944 ShuffleMask.info256>, 1945 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1946 ShuffleMask.info256>, EVEX_V256; 1947 } 1948} 1949 1950multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1951 X86FoldableSchedWrite sched, 1952 AVX512VLVectorVTInfo VTInfo, 1953 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1954 let Predicates = [Prd] in 1955 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1956 Idx.info512>, EVEX_V512; 1957 let Predicates = [Prd, HasVLX] in { 1958 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1959 Idx.info128>, EVEX_V128; 1960 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1961 Idx.info256>, EVEX_V256; 1962 } 1963} 1964 1965defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1966 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1967defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1968 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1969defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1970 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1971 VEX_W, EVEX_CD8<16, CD8VF>; 1972defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1973 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1974 EVEX_CD8<8, CD8VF>; 1975defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1976 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1977defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1978 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1979 1980//===----------------------------------------------------------------------===// 1981// AVX-512 - BLEND using mask 1982// 1983 1984multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1985 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1986 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1987 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1988 (ins _.RC:$src1, _.RC:$src2), 1989 !strconcat(OpcodeStr, 1990 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1991 EVEX_4V, Sched<[sched]>; 1992 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1993 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1994 !strconcat(OpcodeStr, 1995 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1996 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1997 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1998 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1999 !strconcat(OpcodeStr, 2000 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 2001 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 2002 let mayLoad = 1 in { 2003 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2004 (ins _.RC:$src1, _.MemOp:$src2), 2005 !strconcat(OpcodeStr, 2006 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 2007 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 2008 Sched<[sched.Folded, sched.ReadAfterFold]>; 2009 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2010 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2011 !strconcat(OpcodeStr, 2012 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 2013 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 2014 Sched<[sched.Folded, sched.ReadAfterFold]>; 2015 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2016 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2017 !strconcat(OpcodeStr, 2018 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 2019 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 2020 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2021 } 2022 } 2023} 2024multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 2025 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2026 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 2027 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2028 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2029 !strconcat(OpcodeStr, 2030 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2031 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2032 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2033 Sched<[sched.Folded, sched.ReadAfterFold]>; 2034 2035 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2036 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2037 !strconcat(OpcodeStr, 2038 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 2039 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2040 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2041 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2042 2043 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2044 (ins _.RC:$src1, _.ScalarMemOp:$src2), 2045 !strconcat(OpcodeStr, 2046 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2047 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2048 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2049 Sched<[sched.Folded, sched.ReadAfterFold]>; 2050 } 2051} 2052 2053multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2054 AVX512VLVectorVTInfo VTInfo> { 2055 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2056 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2057 EVEX_V512; 2058 2059 let Predicates = [HasVLX] in { 2060 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2061 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2062 EVEX_V256; 2063 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2064 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2065 EVEX_V128; 2066 } 2067} 2068 2069multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2070 AVX512VLVectorVTInfo VTInfo> { 2071 let Predicates = [HasBWI] in 2072 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2073 EVEX_V512; 2074 2075 let Predicates = [HasBWI, HasVLX] in { 2076 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2077 EVEX_V256; 2078 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2079 EVEX_V128; 2080 } 2081} 2082 2083defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2084 avx512vl_f32_info>; 2085defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2086 avx512vl_f64_info>, VEX_W; 2087defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2088 avx512vl_i32_info>; 2089defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2090 avx512vl_i64_info>, VEX_W; 2091defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2092 avx512vl_i8_info>; 2093defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2094 avx512vl_i16_info>, VEX_W; 2095 2096//===----------------------------------------------------------------------===// 2097// Compare Instructions 2098//===----------------------------------------------------------------------===// 2099 2100// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2101 2102multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2103 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2104 X86FoldableSchedWrite sched> { 2105 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2106 (outs _.KRC:$dst), 2107 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2108 "vcmp"#_.Suffix, 2109 "$cc, $src2, $src1", "$src1, $src2, $cc", 2110 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2111 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2112 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2113 let mayLoad = 1 in 2114 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2115 (outs _.KRC:$dst), 2116 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2117 "vcmp"#_.Suffix, 2118 "$cc, $src2, $src1", "$src1, $src2, $cc", 2119 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2120 timm:$cc), 2121 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2122 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2123 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2124 2125 let Uses = [MXCSR] in 2126 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2127 (outs _.KRC:$dst), 2128 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2129 "vcmp"#_.Suffix, 2130 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2131 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2132 timm:$cc), 2133 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2134 timm:$cc)>, 2135 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2136 2137 let isCodeGenOnly = 1 in { 2138 let isCommutable = 1 in 2139 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2140 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2141 !strconcat("vcmp", _.Suffix, 2142 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2143 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2144 _.FRC:$src2, 2145 timm:$cc))]>, 2146 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2147 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2148 (outs _.KRC:$dst), 2149 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2150 !strconcat("vcmp", _.Suffix, 2151 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2152 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2153 (_.ScalarLdFrag addr:$src2), 2154 timm:$cc))]>, 2155 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2156 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2157 } 2158} 2159 2160def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2161 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2162 return N->hasOneUse(); 2163}]>; 2164def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2165 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2166 return N->hasOneUse(); 2167}]>; 2168 2169let Predicates = [HasAVX512] in { 2170 let ExeDomain = SSEPackedSingle in 2171 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2172 X86cmpms_su, X86cmpmsSAE_su, 2173 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2174 let ExeDomain = SSEPackedDouble in 2175 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2176 X86cmpms_su, X86cmpmsSAE_su, 2177 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2178} 2179let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 2180 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 2181 X86cmpms_su, X86cmpmsSAE_su, 2182 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 2183 2184multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2185 X86FoldableSchedWrite sched, 2186 X86VectorVTInfo _, bit IsCommutable> { 2187 let isCommutable = IsCommutable, hasSideEffects = 0 in 2188 def rr : AVX512BI<opc, MRMSrcReg, 2189 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2190 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2191 []>, EVEX_4V, Sched<[sched]>; 2192 let mayLoad = 1, hasSideEffects = 0 in 2193 def rm : AVX512BI<opc, MRMSrcMem, 2194 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2195 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2196 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2197 let isCommutable = IsCommutable, hasSideEffects = 0 in 2198 def rrk : AVX512BI<opc, MRMSrcReg, 2199 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2200 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2201 "$dst {${mask}}, $src1, $src2}"), 2202 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2203 let mayLoad = 1, hasSideEffects = 0 in 2204 def rmk : AVX512BI<opc, MRMSrcMem, 2205 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2206 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2207 "$dst {${mask}}, $src1, $src2}"), 2208 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2209} 2210 2211multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2212 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2213 bit IsCommutable> : 2214 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2215 let mayLoad = 1, hasSideEffects = 0 in { 2216 def rmb : AVX512BI<opc, MRMSrcMem, 2217 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2218 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2219 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2220 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2221 def rmbk : AVX512BI<opc, MRMSrcMem, 2222 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2223 _.ScalarMemOp:$src2), 2224 !strconcat(OpcodeStr, 2225 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2226 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2227 []>, EVEX_4V, EVEX_K, EVEX_B, 2228 Sched<[sched.Folded, sched.ReadAfterFold]>; 2229 } 2230} 2231 2232multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2233 X86SchedWriteWidths sched, 2234 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2235 bit IsCommutable = 0> { 2236 let Predicates = [prd] in 2237 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2238 VTInfo.info512, IsCommutable>, EVEX_V512; 2239 2240 let Predicates = [prd, HasVLX] in { 2241 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2242 VTInfo.info256, IsCommutable>, EVEX_V256; 2243 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2244 VTInfo.info128, IsCommutable>, EVEX_V128; 2245 } 2246} 2247 2248multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2249 X86SchedWriteWidths sched, 2250 AVX512VLVectorVTInfo VTInfo, 2251 Predicate prd, bit IsCommutable = 0> { 2252 let Predicates = [prd] in 2253 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2254 VTInfo.info512, IsCommutable>, EVEX_V512; 2255 2256 let Predicates = [prd, HasVLX] in { 2257 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2258 VTInfo.info256, IsCommutable>, EVEX_V256; 2259 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2260 VTInfo.info128, IsCommutable>, EVEX_V128; 2261 } 2262} 2263 2264// This fragment treats X86cmpm as commutable to help match loads in both 2265// operands for PCMPEQ. 2266def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2267def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2268 (setcc node:$src1, node:$src2, SETGT)>; 2269 2270// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2271// increase the pattern complexity the way an immediate would. 2272let AddedComplexity = 2 in { 2273// FIXME: Is there a better scheduler class for VPCMP? 2274defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2275 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2276 EVEX_CD8<8, CD8VF>, VEX_WIG; 2277 2278defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2279 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2280 EVEX_CD8<16, CD8VF>, VEX_WIG; 2281 2282defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2283 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2284 EVEX_CD8<32, CD8VF>; 2285 2286defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2287 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2288 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2289 2290defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2291 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2292 EVEX_CD8<8, CD8VF>, VEX_WIG; 2293 2294defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2295 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2296 EVEX_CD8<16, CD8VF>, VEX_WIG; 2297 2298defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2299 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2300 EVEX_CD8<32, CD8VF>; 2301 2302defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2303 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2304 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2305} 2306 2307def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2308 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2309 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2310 return getI8Imm(SSECC, SDLoc(N)); 2311}]>; 2312 2313// Swapped operand version of the above. 2314def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2315 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2316 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2317 SSECC = X86::getSwappedVPCMPImm(SSECC); 2318 return getI8Imm(SSECC, SDLoc(N)); 2319}]>; 2320 2321multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2322 PatFrag Frag_su, 2323 X86FoldableSchedWrite sched, 2324 X86VectorVTInfo _, string Name> { 2325 let isCommutable = 1 in 2326 def rri : AVX512AIi8<opc, MRMSrcReg, 2327 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2328 !strconcat("vpcmp", Suffix, 2329 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2330 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2331 (_.VT _.RC:$src2), 2332 cond)))]>, 2333 EVEX_4V, Sched<[sched]>; 2334 def rmi : AVX512AIi8<opc, MRMSrcMem, 2335 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2336 !strconcat("vpcmp", Suffix, 2337 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2338 [(set _.KRC:$dst, (_.KVT 2339 (Frag:$cc 2340 (_.VT _.RC:$src1), 2341 (_.VT (_.LdFrag addr:$src2)), 2342 cond)))]>, 2343 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2344 let isCommutable = 1 in 2345 def rrik : AVX512AIi8<opc, MRMSrcReg, 2346 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2347 u8imm:$cc), 2348 !strconcat("vpcmp", Suffix, 2349 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2350 "$dst {${mask}}, $src1, $src2, $cc}"), 2351 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2352 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2353 (_.VT _.RC:$src2), 2354 cond))))]>, 2355 EVEX_4V, EVEX_K, Sched<[sched]>; 2356 def rmik : AVX512AIi8<opc, MRMSrcMem, 2357 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2358 u8imm:$cc), 2359 !strconcat("vpcmp", Suffix, 2360 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2361 "$dst {${mask}}, $src1, $src2, $cc}"), 2362 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2363 (_.KVT 2364 (Frag_su:$cc 2365 (_.VT _.RC:$src1), 2366 (_.VT (_.LdFrag addr:$src2)), 2367 cond))))]>, 2368 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2369 2370 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2371 (_.VT _.RC:$src1), cond)), 2372 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2373 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2374 2375 def : Pat<(and _.KRCWM:$mask, 2376 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2377 (_.VT _.RC:$src1), cond))), 2378 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2379 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2380 (X86pcmpm_imm_commute $cc))>; 2381} 2382 2383multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2384 PatFrag Frag_su, X86FoldableSchedWrite sched, 2385 X86VectorVTInfo _, string Name> : 2386 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2387 def rmib : AVX512AIi8<opc, MRMSrcMem, 2388 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2389 u8imm:$cc), 2390 !strconcat("vpcmp", Suffix, 2391 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2392 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2393 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2394 (_.VT _.RC:$src1), 2395 (_.BroadcastLdFrag addr:$src2), 2396 cond)))]>, 2397 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2398 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2399 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2400 _.ScalarMemOp:$src2, u8imm:$cc), 2401 !strconcat("vpcmp", Suffix, 2402 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2403 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2404 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2405 (_.KVT (Frag_su:$cc 2406 (_.VT _.RC:$src1), 2407 (_.BroadcastLdFrag addr:$src2), 2408 cond))))]>, 2409 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2410 2411 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2412 (_.VT _.RC:$src1), cond)), 2413 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2414 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2415 2416 def : Pat<(and _.KRCWM:$mask, 2417 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2418 (_.VT _.RC:$src1), cond))), 2419 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2420 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2421 (X86pcmpm_imm_commute $cc))>; 2422} 2423 2424multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2425 PatFrag Frag_su, X86SchedWriteWidths sched, 2426 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2427 let Predicates = [prd] in 2428 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2429 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2430 2431 let Predicates = [prd, HasVLX] in { 2432 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2433 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2434 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2435 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2436 } 2437} 2438 2439multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2440 PatFrag Frag_su, X86SchedWriteWidths sched, 2441 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2442 let Predicates = [prd] in 2443 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2444 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2445 2446 let Predicates = [prd, HasVLX] in { 2447 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2448 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2449 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2450 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2451 } 2452} 2453 2454def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2455 (setcc node:$src1, node:$src2, node:$cc), [{ 2456 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2457 return !ISD::isUnsignedIntSetCC(CC); 2458}], X86pcmpm_imm>; 2459 2460def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2461 (setcc node:$src1, node:$src2, node:$cc), [{ 2462 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2463 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2464}], X86pcmpm_imm>; 2465 2466def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2467 (setcc node:$src1, node:$src2, node:$cc), [{ 2468 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2469 return ISD::isUnsignedIntSetCC(CC); 2470}], X86pcmpm_imm>; 2471 2472def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2473 (setcc node:$src1, node:$src2, node:$cc), [{ 2474 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2475 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2476}], X86pcmpm_imm>; 2477 2478// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2479defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2480 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2481 EVEX_CD8<8, CD8VF>; 2482defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2483 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2484 EVEX_CD8<8, CD8VF>; 2485 2486defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2487 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2488 VEX_W, EVEX_CD8<16, CD8VF>; 2489defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2490 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2491 VEX_W, EVEX_CD8<16, CD8VF>; 2492 2493defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2494 SchedWriteVecALU, avx512vl_i32_info, 2495 HasAVX512>, EVEX_CD8<32, CD8VF>; 2496defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2497 SchedWriteVecALU, avx512vl_i32_info, 2498 HasAVX512>, EVEX_CD8<32, CD8VF>; 2499 2500defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2501 SchedWriteVecALU, avx512vl_i64_info, 2502 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2503defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2504 SchedWriteVecALU, avx512vl_i64_info, 2505 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2506 2507def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2508 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2509 return N->hasOneUse(); 2510}]>; 2511 2512def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2513 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2514 return getI8Imm(Imm, SDLoc(N)); 2515}]>; 2516 2517multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2518 string Name> { 2519let Uses = [MXCSR], mayRaiseFPException = 1 in { 2520 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2521 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2522 "vcmp"#_.Suffix, 2523 "$cc, $src2, $src1", "$src1, $src2, $cc", 2524 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2525 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2526 1>, Sched<[sched]>; 2527 2528 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2529 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2530 "vcmp"#_.Suffix, 2531 "$cc, $src2, $src1", "$src1, $src2, $cc", 2532 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2533 timm:$cc), 2534 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2535 timm:$cc)>, 2536 Sched<[sched.Folded, sched.ReadAfterFold]>; 2537 2538 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2539 (outs _.KRC:$dst), 2540 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2541 "vcmp"#_.Suffix, 2542 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2543 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2544 (X86any_cmpm (_.VT _.RC:$src1), 2545 (_.VT (_.BroadcastLdFrag addr:$src2)), 2546 timm:$cc), 2547 (X86cmpm_su (_.VT _.RC:$src1), 2548 (_.VT (_.BroadcastLdFrag addr:$src2)), 2549 timm:$cc)>, 2550 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2551 } 2552 2553 // Patterns for selecting with loads in other operand. 2554 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2555 timm:$cc), 2556 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2557 (X86cmpm_imm_commute timm:$cc))>; 2558 2559 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2560 (_.VT _.RC:$src1), 2561 timm:$cc)), 2562 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2563 _.RC:$src1, addr:$src2, 2564 (X86cmpm_imm_commute timm:$cc))>; 2565 2566 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2567 (_.VT _.RC:$src1), timm:$cc), 2568 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2569 (X86cmpm_imm_commute timm:$cc))>; 2570 2571 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2572 (_.VT _.RC:$src1), 2573 timm:$cc)), 2574 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2575 _.RC:$src1, addr:$src2, 2576 (X86cmpm_imm_commute timm:$cc))>; 2577 2578 // Patterns for mask intrinsics. 2579 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2580 (_.KVT immAllOnesV)), 2581 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2582 2583 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2584 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2585 _.RC:$src2, timm:$cc)>; 2586 2587 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2588 (_.KVT immAllOnesV)), 2589 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2590 2591 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2592 _.KRCWM:$mask), 2593 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2594 addr:$src2, timm:$cc)>; 2595 2596 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2597 (_.KVT immAllOnesV)), 2598 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2599 2600 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2601 _.KRCWM:$mask), 2602 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2603 addr:$src2, timm:$cc)>; 2604 2605 // Patterns for mask intrinsics with loads in other operand. 2606 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2607 (_.KVT immAllOnesV)), 2608 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2609 (X86cmpm_imm_commute timm:$cc))>; 2610 2611 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2612 _.KRCWM:$mask), 2613 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2614 _.RC:$src1, addr:$src2, 2615 (X86cmpm_imm_commute timm:$cc))>; 2616 2617 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2618 (_.KVT immAllOnesV)), 2619 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2620 (X86cmpm_imm_commute timm:$cc))>; 2621 2622 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2623 _.KRCWM:$mask), 2624 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2625 _.RC:$src1, addr:$src2, 2626 (X86cmpm_imm_commute timm:$cc))>; 2627} 2628 2629multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2630 // comparison code form (VCMP[EQ/LT/LE/...] 2631 let Uses = [MXCSR] in 2632 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2633 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2634 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2635 "vcmp"#_.Suffix, 2636 "$cc, {sae}, $src2, $src1", 2637 "$src1, $src2, {sae}, $cc", 2638 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2639 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2640 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2641 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2642 EVEX_B, Sched<[sched]>; 2643} 2644 2645multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2646 Predicate Pred = HasAVX512> { 2647 let Predicates = [Pred] in { 2648 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2649 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2650 2651 } 2652 let Predicates = [Pred,HasVLX] in { 2653 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2654 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2655 } 2656} 2657 2658defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2659 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2660defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2661 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2662defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2663 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA; 2664 2665// Patterns to select fp compares with load as first operand. 2666let Predicates = [HasAVX512] in { 2667 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2668 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2669 2670 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2671 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2672} 2673 2674let Predicates = [HasFP16] in { 2675 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2676 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2677} 2678 2679// ---------------------------------------------------------------- 2680// FPClass 2681 2682def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2683 (X86Vfpclasss node:$src1, node:$src2), [{ 2684 return N->hasOneUse(); 2685}]>; 2686 2687def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2688 (X86Vfpclass node:$src1, node:$src2), [{ 2689 return N->hasOneUse(); 2690}]>; 2691 2692//handle fpclass instruction mask = op(reg_scalar,imm) 2693// op(mem_scalar,imm) 2694multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2695 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2696 Predicate prd> { 2697 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2698 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2699 (ins _.RC:$src1, i32u8imm:$src2), 2700 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2701 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2702 (i32 timm:$src2)))]>, 2703 Sched<[sched]>; 2704 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2705 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2706 OpcodeStr#_.Suffix# 2707 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2708 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2709 (X86Vfpclasss_su (_.VT _.RC:$src1), 2710 (i32 timm:$src2))))]>, 2711 EVEX_K, Sched<[sched]>; 2712 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2713 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2714 OpcodeStr#_.Suffix# 2715 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2716 [(set _.KRC:$dst, 2717 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2718 (i32 timm:$src2)))]>, 2719 Sched<[sched.Folded, sched.ReadAfterFold]>; 2720 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2721 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2722 OpcodeStr#_.Suffix# 2723 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2724 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2725 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2726 (i32 timm:$src2))))]>, 2727 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2728 } 2729} 2730 2731//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2732// fpclass(reg_vec, mem_vec, imm) 2733// fpclass(reg_vec, broadcast(eltVt), imm) 2734multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2735 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2736 string mem>{ 2737 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2738 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2739 (ins _.RC:$src1, i32u8imm:$src2), 2740 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2741 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2742 (i32 timm:$src2)))]>, 2743 Sched<[sched]>; 2744 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2745 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2746 OpcodeStr#_.Suffix# 2747 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2748 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2749 (X86Vfpclass_su (_.VT _.RC:$src1), 2750 (i32 timm:$src2))))]>, 2751 EVEX_K, Sched<[sched]>; 2752 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2753 (ins _.MemOp:$src1, i32u8imm:$src2), 2754 OpcodeStr#_.Suffix#"{"#mem#"}"# 2755 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2756 [(set _.KRC:$dst,(X86Vfpclass 2757 (_.VT (_.LdFrag addr:$src1)), 2758 (i32 timm:$src2)))]>, 2759 Sched<[sched.Folded, sched.ReadAfterFold]>; 2760 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2761 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2762 OpcodeStr#_.Suffix#"{"#mem#"}"# 2763 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2764 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2765 (_.VT (_.LdFrag addr:$src1)), 2766 (i32 timm:$src2))))]>, 2767 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2768 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2769 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2770 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2771 _.BroadcastStr#", $dst|$dst, ${src1}" 2772 #_.BroadcastStr#", $src2}", 2773 [(set _.KRC:$dst,(X86Vfpclass 2774 (_.VT (_.BroadcastLdFrag addr:$src1)), 2775 (i32 timm:$src2)))]>, 2776 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2777 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2778 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2779 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2780 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2781 _.BroadcastStr#", $src2}", 2782 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2783 (_.VT (_.BroadcastLdFrag addr:$src1)), 2784 (i32 timm:$src2))))]>, 2785 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2786 } 2787 2788 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2789 // the memory form. 2790 def : InstAlias<OpcodeStr#_.Suffix#mem# 2791 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2792 (!cast<Instruction>(NAME#"rr") 2793 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2794 def : InstAlias<OpcodeStr#_.Suffix#mem# 2795 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2796 (!cast<Instruction>(NAME#"rrk") 2797 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2798 def : InstAlias<OpcodeStr#_.Suffix#mem# 2799 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2800 _.BroadcastStr#", $src2}", 2801 (!cast<Instruction>(NAME#"rmb") 2802 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2803 def : InstAlias<OpcodeStr#_.Suffix#mem# 2804 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2805 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2806 (!cast<Instruction>(NAME#"rmbk") 2807 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2808} 2809 2810multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2811 bits<8> opc, X86SchedWriteWidths sched, 2812 Predicate prd>{ 2813 let Predicates = [prd] in { 2814 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2815 _.info512, "z">, EVEX_V512; 2816 } 2817 let Predicates = [prd, HasVLX] in { 2818 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2819 _.info128, "x">, EVEX_V128; 2820 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2821 _.info256, "y">, EVEX_V256; 2822 } 2823} 2824 2825multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2826 bits<8> opcScalar, X86SchedWriteWidths sched> { 2827 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2828 sched, HasFP16>, 2829 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2830 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2831 sched.Scl, f16x_info, HasFP16>, 2832 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2833 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2834 sched, HasDQI>, 2835 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2836 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2837 sched, HasDQI>, 2838 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W; 2839 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2840 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2841 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2842 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2843 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2844 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W; 2845} 2846 2847defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2848 2849//----------------------------------------------------------------- 2850// Mask register copy, including 2851// - copy between mask registers 2852// - load/store mask registers 2853// - copy from GPR to mask register and vice versa 2854// 2855multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2856 string OpcodeStr, RegisterClass KRC, 2857 ValueType vvt, X86MemOperand x86memop> { 2858 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2859 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2860 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2861 Sched<[WriteMove]>; 2862 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2863 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2864 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2865 Sched<[WriteLoad]>; 2866 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2867 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2868 [(store KRC:$src, addr:$dst)]>, 2869 Sched<[WriteStore]>; 2870} 2871 2872multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2873 string OpcodeStr, 2874 RegisterClass KRC, RegisterClass GRC> { 2875 let hasSideEffects = 0 in { 2876 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2877 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2878 Sched<[WriteMove]>; 2879 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2880 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2881 Sched<[WriteMove]>; 2882 } 2883} 2884 2885let Predicates = [HasDQI] in 2886 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2887 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2888 VEX, PD; 2889 2890let Predicates = [HasAVX512] in 2891 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2892 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2893 VEX, PS; 2894 2895let Predicates = [HasBWI] in { 2896 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2897 VEX, PD, VEX_W; 2898 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2899 VEX, XD; 2900 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2901 VEX, PS, VEX_W; 2902 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2903 VEX, XD, VEX_W; 2904} 2905 2906// GR from/to mask register 2907def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2908 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2909def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2910 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2911def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2912 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2913 2914def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2915 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2916def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2917 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2918 2919def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2920 (KMOVWrk VK16:$src)>; 2921def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2922 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2923def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2924 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2925def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2926 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2927 2928def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2929 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2930def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2931 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2932def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2933 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2934def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2935 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2936 2937def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2938 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2939def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2940 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2941def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2942 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2943def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2944 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2945 2946// Load/store kreg 2947let Predicates = [HasDQI] in { 2948 def : Pat<(v1i1 (load addr:$src)), 2949 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2950 def : Pat<(v2i1 (load addr:$src)), 2951 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2952 def : Pat<(v4i1 (load addr:$src)), 2953 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2954} 2955 2956let Predicates = [HasAVX512] in { 2957 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2958 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2959 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2960 (KMOVWkm addr:$src)>; 2961} 2962 2963def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2964 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2965 SDTCVecEltisVT<1, i1>, 2966 SDTCisPtrTy<2>]>>; 2967 2968let Predicates = [HasAVX512] in { 2969 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2970 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2971 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2972 2973 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2974 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2975 2976 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2977 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2978 2979 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2980 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2981 } 2982 2983 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2984 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2985 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2986 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2987 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2988 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2989 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2990 2991 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2992 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2993 (KMOVWkr (AND32ri8 2994 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2995 (i32 1)))>; 2996} 2997 2998// Mask unary operation 2999// - KNOT 3000multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 3001 RegisterClass KRC, SDPatternOperator OpNode, 3002 X86FoldableSchedWrite sched, Predicate prd> { 3003 let Predicates = [prd] in 3004 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 3005 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3006 [(set KRC:$dst, (OpNode KRC:$src))]>, 3007 Sched<[sched]>; 3008} 3009 3010multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 3011 SDPatternOperator OpNode, 3012 X86FoldableSchedWrite sched> { 3013 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3014 sched, HasDQI>, VEX, PD; 3015 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3016 sched, HasAVX512>, VEX, PS; 3017 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3018 sched, HasBWI>, VEX, PD, VEX_W; 3019 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3020 sched, HasBWI>, VEX, PS, VEX_W; 3021} 3022 3023// TODO - do we need a X86SchedWriteWidths::KMASK type? 3024defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 3025 3026// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 3027let Predicates = [HasAVX512, NoDQI] in 3028def : Pat<(vnot VK8:$src), 3029 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 3030 3031def : Pat<(vnot VK4:$src), 3032 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 3033def : Pat<(vnot VK2:$src), 3034 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 3035def : Pat<(vnot VK1:$src), 3036 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 3037 3038// Mask binary operation 3039// - KAND, KANDN, KOR, KXNOR, KXOR 3040multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 3041 RegisterClass KRC, SDPatternOperator OpNode, 3042 X86FoldableSchedWrite sched, Predicate prd, 3043 bit IsCommutable> { 3044 let Predicates = [prd], isCommutable = IsCommutable in 3045 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 3046 !strconcat(OpcodeStr, 3047 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3048 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 3049 Sched<[sched]>; 3050} 3051 3052multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 3053 SDPatternOperator OpNode, 3054 X86FoldableSchedWrite sched, bit IsCommutable, 3055 Predicate prdW = HasAVX512> { 3056 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3057 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 3058 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3059 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 3060 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3061 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 3062 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3063 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3064} 3065 3066// These nodes use 'vnot' instead of 'not' to support vectors. 3067def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3068def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3069 3070// TODO - do we need a X86SchedWriteWidths::KMASK type? 3071defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3072defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3073defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3074defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3075defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3076defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3077 3078multiclass avx512_binop_pat<SDPatternOperator VOpNode, 3079 Instruction Inst> { 3080 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3081 // for the DQI set, this type is legal and KxxxB instruction is used 3082 let Predicates = [NoDQI] in 3083 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3084 (COPY_TO_REGCLASS 3085 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3086 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3087 3088 // All types smaller than 8 bits require conversion anyway 3089 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 3090 (COPY_TO_REGCLASS (Inst 3091 (COPY_TO_REGCLASS VK1:$src1, VK16), 3092 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3093 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3094 (COPY_TO_REGCLASS (Inst 3095 (COPY_TO_REGCLASS VK2:$src1, VK16), 3096 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 3097 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3098 (COPY_TO_REGCLASS (Inst 3099 (COPY_TO_REGCLASS VK4:$src1, VK16), 3100 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 3101} 3102 3103defm : avx512_binop_pat<and, KANDWrr>; 3104defm : avx512_binop_pat<vandn, KANDNWrr>; 3105defm : avx512_binop_pat<or, KORWrr>; 3106defm : avx512_binop_pat<vxnor, KXNORWrr>; 3107defm : avx512_binop_pat<xor, KXORWrr>; 3108 3109// Mask unpacking 3110multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3111 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3112 Predicate prd> { 3113 let Predicates = [prd] in { 3114 let hasSideEffects = 0 in 3115 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3116 (ins Src.KRC:$src1, Src.KRC:$src2), 3117 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3118 VEX_4V, VEX_L, Sched<[sched]>; 3119 3120 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3121 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 3122 } 3123} 3124 3125defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3126defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3127defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3128 3129// Mask bit testing 3130multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3131 SDNode OpNode, X86FoldableSchedWrite sched, 3132 Predicate prd> { 3133 let Predicates = [prd], Defs = [EFLAGS] in 3134 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3135 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3136 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3137 Sched<[sched]>; 3138} 3139 3140multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3141 X86FoldableSchedWrite sched, 3142 Predicate prdW = HasAVX512> { 3143 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3144 VEX, PD; 3145 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3146 VEX, PS; 3147 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3148 VEX, PS, VEX_W; 3149 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3150 VEX, PD, VEX_W; 3151} 3152 3153// TODO - do we need a X86SchedWriteWidths::KMASK type? 3154defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3155defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3156 3157// Mask shift 3158multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3159 SDNode OpNode, X86FoldableSchedWrite sched> { 3160 let Predicates = [HasAVX512] in 3161 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3162 !strconcat(OpcodeStr, 3163 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3164 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3165 Sched<[sched]>; 3166} 3167 3168multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3169 SDNode OpNode, X86FoldableSchedWrite sched> { 3170 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3171 sched>, VEX, TAPD, VEX_W; 3172 let Predicates = [HasDQI] in 3173 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3174 sched>, VEX, TAPD; 3175 let Predicates = [HasBWI] in { 3176 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3177 sched>, VEX, TAPD, VEX_W; 3178 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3179 sched>, VEX, TAPD; 3180 } 3181} 3182 3183defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3184defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3185 3186// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3187multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3188 string InstStr, 3189 X86VectorVTInfo Narrow, 3190 X86VectorVTInfo Wide> { 3191def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3192 (Narrow.VT Narrow.RC:$src2), cond)), 3193 (COPY_TO_REGCLASS 3194 (!cast<Instruction>(InstStr#"Zrri") 3195 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3196 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3197 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3198 3199def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3200 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3201 (Narrow.VT Narrow.RC:$src2), 3202 cond)))), 3203 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3204 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3205 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3206 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3207 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3208} 3209 3210multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3211 string InstStr, 3212 X86VectorVTInfo Narrow, 3213 X86VectorVTInfo Wide> { 3214// Broadcast load. 3215def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3216 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3217 (COPY_TO_REGCLASS 3218 (!cast<Instruction>(InstStr#"Zrmib") 3219 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3220 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3221 3222def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3223 (Narrow.KVT 3224 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3225 (Narrow.BroadcastLdFrag addr:$src2), 3226 cond)))), 3227 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3228 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3229 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3230 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3231 3232// Commuted with broadcast load. 3233def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3234 (Narrow.VT Narrow.RC:$src1), 3235 cond)), 3236 (COPY_TO_REGCLASS 3237 (!cast<Instruction>(InstStr#"Zrmib") 3238 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3239 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3240 3241def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3242 (Narrow.KVT 3243 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3244 (Narrow.VT Narrow.RC:$src1), 3245 cond)))), 3246 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3247 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3248 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3249 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3250} 3251 3252// Same as above, but for fp types which don't use PatFrags. 3253multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3254 X86VectorVTInfo Narrow, 3255 X86VectorVTInfo Wide> { 3256def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3257 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3258 (COPY_TO_REGCLASS 3259 (!cast<Instruction>(InstStr#"Zrri") 3260 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3261 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3262 timm:$cc), Narrow.KRC)>; 3263 3264def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3265 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3266 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3267 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3268 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3269 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3270 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3271 timm:$cc), Narrow.KRC)>; 3272 3273// Broadcast load. 3274def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3275 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3276 (COPY_TO_REGCLASS 3277 (!cast<Instruction>(InstStr#"Zrmbi") 3278 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3279 addr:$src2, timm:$cc), Narrow.KRC)>; 3280 3281def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3282 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3283 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3284 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3285 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3286 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3287 addr:$src2, timm:$cc), Narrow.KRC)>; 3288 3289// Commuted with broadcast load. 3290def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3291 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3292 (COPY_TO_REGCLASS 3293 (!cast<Instruction>(InstStr#"Zrmbi") 3294 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3295 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3296 3297def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3298 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3299 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3300 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3301 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3302 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3303 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3304} 3305 3306let Predicates = [HasAVX512, NoVLX] in { 3307 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3308 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3309 3310 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3311 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3312 3313 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3314 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3315 3316 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3317 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3318 3319 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3320 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3321 3322 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3323 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3324 3325 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3326 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3327 3328 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3329 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3330 3331 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3332 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3333 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3334 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3335} 3336 3337let Predicates = [HasBWI, NoVLX] in { 3338 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3339 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3340 3341 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3342 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3343 3344 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3345 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3346 3347 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3348 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3349} 3350 3351// Mask setting all 0s or 1s 3352multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3353 let Predicates = [HasAVX512] in 3354 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3355 SchedRW = [WriteZero] in 3356 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3357 [(set KRC:$dst, (VT Val))]>; 3358} 3359 3360multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3361 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3362 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3363 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3364} 3365 3366defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3367defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3368 3369// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3370let Predicates = [HasAVX512] in { 3371 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3372 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3373 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3374 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3375 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3376 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3377 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3378 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3379} 3380 3381// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3382multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3383 RegisterClass RC, ValueType VT> { 3384 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3385 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3386 3387 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3388 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3389} 3390defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3391defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3392defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3393defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3394defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3395defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3396 3397defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3398defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3399defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3400defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3401defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3402 3403defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3404defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3405defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3406defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3407 3408defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3409defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3410defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3411 3412defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3413defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3414 3415defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3416 3417//===----------------------------------------------------------------------===// 3418// AVX-512 - Aligned and unaligned load and store 3419// 3420 3421multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3422 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3423 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3424 bit NoRMPattern = 0, 3425 SDPatternOperator SelectOprr = vselect> { 3426 let hasSideEffects = 0 in { 3427 let isMoveReg = 1 in 3428 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3429 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3430 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3431 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3432 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3433 (ins _.KRCWM:$mask, _.RC:$src), 3434 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3435 "${dst} {${mask}} {z}, $src}"), 3436 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3437 (_.VT _.RC:$src), 3438 _.ImmAllZerosV)))], _.ExeDomain>, 3439 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3440 3441 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3442 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3443 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3444 !if(NoRMPattern, [], 3445 [(set _.RC:$dst, 3446 (_.VT (ld_frag addr:$src)))]), 3447 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3448 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3449 3450 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3451 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3452 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3453 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3454 "${dst} {${mask}}, $src1}"), 3455 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3456 (_.VT _.RC:$src1), 3457 (_.VT _.RC:$src0))))], _.ExeDomain>, 3458 EVEX, EVEX_K, Sched<[Sched.RR]>; 3459 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3460 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3461 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3462 "${dst} {${mask}}, $src1}"), 3463 [(set _.RC:$dst, (_.VT 3464 (vselect_mask _.KRCWM:$mask, 3465 (_.VT (ld_frag addr:$src1)), 3466 (_.VT _.RC:$src0))))], _.ExeDomain>, 3467 EVEX, EVEX_K, Sched<[Sched.RM]>; 3468 } 3469 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3470 (ins _.KRCWM:$mask, _.MemOp:$src), 3471 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3472 "${dst} {${mask}} {z}, $src}", 3473 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3474 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3475 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3476 } 3477 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3478 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3479 3480 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3481 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3482 3483 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3484 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3485 _.KRCWM:$mask, addr:$ptr)>; 3486} 3487 3488multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3489 AVX512VLVectorVTInfo _, Predicate prd, 3490 X86SchedWriteMoveLSWidths Sched, 3491 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3492 let Predicates = [prd] in 3493 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3494 _.info512.AlignedLdFrag, masked_load_aligned, 3495 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3496 3497 let Predicates = [prd, HasVLX] in { 3498 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3499 _.info256.AlignedLdFrag, masked_load_aligned, 3500 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3501 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3502 _.info128.AlignedLdFrag, masked_load_aligned, 3503 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3504 } 3505} 3506 3507multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3508 AVX512VLVectorVTInfo _, Predicate prd, 3509 X86SchedWriteMoveLSWidths Sched, 3510 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3511 SDPatternOperator SelectOprr = vselect> { 3512 let Predicates = [prd] in 3513 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3514 masked_load, Sched.ZMM, "", 3515 NoRMPattern, SelectOprr>, EVEX_V512; 3516 3517 let Predicates = [prd, HasVLX] in { 3518 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3519 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3520 NoRMPattern, SelectOprr>, EVEX_V256; 3521 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3522 masked_load, Sched.XMM, EVEX2VEXOvrd, 3523 NoRMPattern, SelectOprr>, EVEX_V128; 3524 } 3525} 3526 3527multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3528 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3529 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3530 bit NoMRPattern = 0> { 3531 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3532 let isMoveReg = 1 in 3533 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3534 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3535 [], _.ExeDomain>, EVEX, 3536 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3537 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3538 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3539 (ins _.KRCWM:$mask, _.RC:$src), 3540 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3541 "${dst} {${mask}}, $src}", 3542 [], _.ExeDomain>, EVEX, EVEX_K, 3543 FoldGenData<BaseName#_.ZSuffix#rrk>, 3544 Sched<[Sched.RR]>; 3545 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3546 (ins _.KRCWM:$mask, _.RC:$src), 3547 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3548 "${dst} {${mask}} {z}, $src}", 3549 [], _.ExeDomain>, EVEX, EVEX_KZ, 3550 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3551 Sched<[Sched.RR]>; 3552 } 3553 3554 let hasSideEffects = 0, mayStore = 1 in 3555 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3556 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3557 !if(NoMRPattern, [], 3558 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3559 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3560 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3561 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3562 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3563 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3564 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3565 NotMemoryFoldable; 3566 3567 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3568 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3569 _.KRCWM:$mask, _.RC:$src)>; 3570 3571 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3572 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3573 _.RC:$dst, _.RC:$src), 0>; 3574 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3575 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3576 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3577 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3578 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3579 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3580} 3581 3582multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3583 AVX512VLVectorVTInfo _, Predicate prd, 3584 X86SchedWriteMoveLSWidths Sched, 3585 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3586 let Predicates = [prd] in 3587 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3588 masked_store, Sched.ZMM, "", 3589 NoMRPattern>, EVEX_V512; 3590 let Predicates = [prd, HasVLX] in { 3591 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3592 masked_store, Sched.YMM, 3593 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3594 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3595 masked_store, Sched.XMM, EVEX2VEXOvrd, 3596 NoMRPattern>, EVEX_V128; 3597 } 3598} 3599 3600multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3601 AVX512VLVectorVTInfo _, Predicate prd, 3602 X86SchedWriteMoveLSWidths Sched, 3603 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3604 let Predicates = [prd] in 3605 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3606 masked_store_aligned, Sched.ZMM, "", 3607 NoMRPattern>, EVEX_V512; 3608 3609 let Predicates = [prd, HasVLX] in { 3610 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3611 masked_store_aligned, Sched.YMM, 3612 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3613 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3614 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3615 NoMRPattern>, EVEX_V128; 3616 } 3617} 3618 3619defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3620 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3621 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3622 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3623 PS, EVEX_CD8<32, CD8VF>; 3624 3625defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3626 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3627 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3628 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3629 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3630 3631defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3632 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3633 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3634 SchedWriteFMoveLS, "VMOVUPS">, 3635 PS, EVEX_CD8<32, CD8VF>; 3636 3637defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3638 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3639 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3640 SchedWriteFMoveLS, "VMOVUPD">, 3641 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3642 3643defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3644 HasAVX512, SchedWriteVecMoveLS, 3645 "VMOVDQA", 1>, 3646 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3647 HasAVX512, SchedWriteVecMoveLS, 3648 "VMOVDQA", 1>, 3649 PD, EVEX_CD8<32, CD8VF>; 3650 3651defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3652 HasAVX512, SchedWriteVecMoveLS, 3653 "VMOVDQA">, 3654 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3655 HasAVX512, SchedWriteVecMoveLS, 3656 "VMOVDQA">, 3657 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3658 3659defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3660 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3661 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3662 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3663 XD, EVEX_CD8<8, CD8VF>; 3664 3665defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3666 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3667 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3668 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3669 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3670 3671defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3672 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3673 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3674 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3675 XS, EVEX_CD8<32, CD8VF>; 3676 3677defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3678 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3679 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3680 SchedWriteVecMoveLS, "VMOVDQU">, 3681 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3682 3683// Special instructions to help with spilling when we don't have VLX. We need 3684// to load or store from a ZMM register instead. These are converted in 3685// expandPostRAPseudos. 3686let isReMaterializable = 1, canFoldAsLoad = 1, 3687 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3688def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3689 "", []>, Sched<[WriteFLoadX]>; 3690def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3691 "", []>, Sched<[WriteFLoadY]>; 3692def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3693 "", []>, Sched<[WriteFLoadX]>; 3694def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3695 "", []>, Sched<[WriteFLoadY]>; 3696} 3697 3698let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3699def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3700 "", []>, Sched<[WriteFStoreX]>; 3701def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3702 "", []>, Sched<[WriteFStoreY]>; 3703def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3704 "", []>, Sched<[WriteFStoreX]>; 3705def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3706 "", []>, Sched<[WriteFStoreY]>; 3707} 3708 3709def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3710 (v8i64 VR512:$src))), 3711 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3712 VK8), VR512:$src)>; 3713 3714def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3715 (v16i32 VR512:$src))), 3716 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3717 3718// These patterns exist to prevent the above patterns from introducing a second 3719// mask inversion when one already exists. 3720def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3721 (v8i64 immAllZerosV), 3722 (v8i64 VR512:$src))), 3723 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3724def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3725 (v16i32 immAllZerosV), 3726 (v16i32 VR512:$src))), 3727 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3728 3729multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3730 X86VectorVTInfo Wide> { 3731 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3732 Narrow.RC:$src1, Narrow.RC:$src0)), 3733 (EXTRACT_SUBREG 3734 (Wide.VT 3735 (!cast<Instruction>(InstrStr#"rrk") 3736 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3737 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3738 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3739 Narrow.SubRegIdx)>; 3740 3741 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3742 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3743 (EXTRACT_SUBREG 3744 (Wide.VT 3745 (!cast<Instruction>(InstrStr#"rrkz") 3746 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3747 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3748 Narrow.SubRegIdx)>; 3749} 3750 3751// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3752// available. Use a 512-bit operation and extract. 3753let Predicates = [HasAVX512, NoVLX] in { 3754 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3755 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3756 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3757 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3758 3759 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3760 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3761 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3762 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3763} 3764 3765let Predicates = [HasBWI, NoVLX] in { 3766 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3767 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3768 3769 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3770 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3771 3772 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>; 3773 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>; 3774 3775 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>; 3776 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>; 3777} 3778 3779let Predicates = [HasAVX512] in { 3780 // 512-bit load. 3781 def : Pat<(alignedloadv16i32 addr:$src), 3782 (VMOVDQA64Zrm addr:$src)>; 3783 def : Pat<(alignedloadv32i16 addr:$src), 3784 (VMOVDQA64Zrm addr:$src)>; 3785 def : Pat<(alignedloadv32f16 addr:$src), 3786 (VMOVAPSZrm addr:$src)>; 3787 def : Pat<(alignedloadv32bf16 addr:$src), 3788 (VMOVAPSZrm addr:$src)>; 3789 def : Pat<(alignedloadv64i8 addr:$src), 3790 (VMOVDQA64Zrm addr:$src)>; 3791 def : Pat<(loadv16i32 addr:$src), 3792 (VMOVDQU64Zrm addr:$src)>; 3793 def : Pat<(loadv32i16 addr:$src), 3794 (VMOVDQU64Zrm addr:$src)>; 3795 def : Pat<(loadv32f16 addr:$src), 3796 (VMOVUPSZrm addr:$src)>; 3797 def : Pat<(loadv32bf16 addr:$src), 3798 (VMOVUPSZrm addr:$src)>; 3799 def : Pat<(loadv64i8 addr:$src), 3800 (VMOVDQU64Zrm addr:$src)>; 3801 3802 // 512-bit store. 3803 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3804 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3805 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3806 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3807 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3808 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3809 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst), 3810 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3811 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3812 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3813 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3814 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3815 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3816 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3817 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3818 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3819 def : Pat<(store (v32bf16 VR512:$src), addr:$dst), 3820 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3821 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3822 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3823} 3824 3825let Predicates = [HasVLX] in { 3826 // 128-bit load. 3827 def : Pat<(alignedloadv4i32 addr:$src), 3828 (VMOVDQA64Z128rm addr:$src)>; 3829 def : Pat<(alignedloadv8i16 addr:$src), 3830 (VMOVDQA64Z128rm addr:$src)>; 3831 def : Pat<(alignedloadv8f16 addr:$src), 3832 (VMOVAPSZ128rm addr:$src)>; 3833 def : Pat<(alignedloadv8bf16 addr:$src), 3834 (VMOVAPSZ128rm addr:$src)>; 3835 def : Pat<(alignedloadv16i8 addr:$src), 3836 (VMOVDQA64Z128rm addr:$src)>; 3837 def : Pat<(loadv4i32 addr:$src), 3838 (VMOVDQU64Z128rm addr:$src)>; 3839 def : Pat<(loadv8i16 addr:$src), 3840 (VMOVDQU64Z128rm addr:$src)>; 3841 def : Pat<(loadv8f16 addr:$src), 3842 (VMOVUPSZ128rm addr:$src)>; 3843 def : Pat<(loadv8bf16 addr:$src), 3844 (VMOVUPSZ128rm addr:$src)>; 3845 def : Pat<(loadv16i8 addr:$src), 3846 (VMOVDQU64Z128rm addr:$src)>; 3847 3848 // 128-bit store. 3849 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3850 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3851 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3852 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3853 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3854 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3855 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst), 3856 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3857 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3858 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3859 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3860 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3861 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3862 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3863 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3864 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3865 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst), 3866 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3867 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3868 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3869 3870 // 256-bit load. 3871 def : Pat<(alignedloadv8i32 addr:$src), 3872 (VMOVDQA64Z256rm addr:$src)>; 3873 def : Pat<(alignedloadv16i16 addr:$src), 3874 (VMOVDQA64Z256rm addr:$src)>; 3875 def : Pat<(alignedloadv16f16 addr:$src), 3876 (VMOVAPSZ256rm addr:$src)>; 3877 def : Pat<(alignedloadv16bf16 addr:$src), 3878 (VMOVAPSZ256rm addr:$src)>; 3879 def : Pat<(alignedloadv32i8 addr:$src), 3880 (VMOVDQA64Z256rm addr:$src)>; 3881 def : Pat<(loadv8i32 addr:$src), 3882 (VMOVDQU64Z256rm addr:$src)>; 3883 def : Pat<(loadv16i16 addr:$src), 3884 (VMOVDQU64Z256rm addr:$src)>; 3885 def : Pat<(loadv16f16 addr:$src), 3886 (VMOVUPSZ256rm addr:$src)>; 3887 def : Pat<(loadv16bf16 addr:$src), 3888 (VMOVUPSZ256rm addr:$src)>; 3889 def : Pat<(loadv32i8 addr:$src), 3890 (VMOVDQU64Z256rm addr:$src)>; 3891 3892 // 256-bit store. 3893 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3894 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3895 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3896 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3897 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3898 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3899 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst), 3900 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3901 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3902 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3903 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3904 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3905 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3906 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3907 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3908 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3909 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst), 3910 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3911 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3912 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3913} 3914 3915multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> { 3916let Predicates = [HasBWI] in { 3917 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))), 3918 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3919 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)), 3920 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3921 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3922 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3923 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3924 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3925 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)), 3926 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3927 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3928 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3929 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3930 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3931 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)), 3932 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3933 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))), 3934 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3935 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)), 3936 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3937 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)), 3938 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3939 3940 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask), 3941 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3942} 3943let Predicates = [HasBWI, HasVLX] in { 3944 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))), 3945 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3946 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)), 3947 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3948 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3949 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3950 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3951 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3952 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)), 3953 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3954 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3955 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3956 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3957 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3958 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)), 3959 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3960 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))), 3961 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3962 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)), 3963 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3964 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)), 3965 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3966 3967 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask), 3968 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3969 3970 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))), 3971 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3972 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)), 3973 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3974 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3975 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3976 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3977 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3978 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)), 3979 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3980 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3981 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3982 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3983 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3984 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)), 3985 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3986 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))), 3987 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3988 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)), 3989 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3990 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)), 3991 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3992 3993 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask), 3994 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3995} 3996} 3997 3998defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>; 3999defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>; 4000 4001// Move Int Doubleword to Packed Double Int 4002// 4003let ExeDomain = SSEPackedInt in { 4004def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 4005 "vmovd\t{$src, $dst|$dst, $src}", 4006 [(set VR128X:$dst, 4007 (v4i32 (scalar_to_vector GR32:$src)))]>, 4008 EVEX, Sched<[WriteVecMoveFromGpr]>; 4009def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 4010 "vmovd\t{$src, $dst|$dst, $src}", 4011 [(set VR128X:$dst, 4012 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 4013 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 4014def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 4015 "vmovq\t{$src, $dst|$dst, $src}", 4016 [(set VR128X:$dst, 4017 (v2i64 (scalar_to_vector GR64:$src)))]>, 4018 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 4019let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4020def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 4021 (ins i64mem:$src), 4022 "vmovq\t{$src, $dst|$dst, $src}", []>, 4023 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 4024let isCodeGenOnly = 1 in { 4025def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 4026 "vmovq\t{$src, $dst|$dst, $src}", 4027 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 4028 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 4029def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 4030 "vmovq\t{$src, $dst|$dst, $src}", 4031 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 4032 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 4033} 4034} // ExeDomain = SSEPackedInt 4035 4036// Move Int Doubleword to Single Scalar 4037// 4038let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4039def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 4040 "vmovd\t{$src, $dst|$dst, $src}", 4041 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 4042 EVEX, Sched<[WriteVecMoveFromGpr]>; 4043} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4044 4045// Move doubleword from xmm register to r/m32 4046// 4047let ExeDomain = SSEPackedInt in { 4048def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 4049 "vmovd\t{$src, $dst|$dst, $src}", 4050 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 4051 (iPTR 0)))]>, 4052 EVEX, Sched<[WriteVecMoveToGpr]>; 4053def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 4054 (ins i32mem:$dst, VR128X:$src), 4055 "vmovd\t{$src, $dst|$dst, $src}", 4056 [(store (i32 (extractelt (v4i32 VR128X:$src), 4057 (iPTR 0))), addr:$dst)]>, 4058 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 4059} // ExeDomain = SSEPackedInt 4060 4061// Move quadword from xmm1 register to r/m64 4062// 4063let ExeDomain = SSEPackedInt in { 4064def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 4065 "vmovq\t{$src, $dst|$dst, $src}", 4066 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 4067 (iPTR 0)))]>, 4068 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 4069 Requires<[HasAVX512]>; 4070 4071let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4072def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 4073 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 4074 EVEX, VEX_W, Sched<[WriteVecStore]>, 4075 Requires<[HasAVX512, In64BitMode]>; 4076 4077def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 4078 (ins i64mem:$dst, VR128X:$src), 4079 "vmovq\t{$src, $dst|$dst, $src}", 4080 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 4081 addr:$dst)]>, 4082 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 4083 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 4084 4085let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 4086def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 4087 (ins VR128X:$src), 4088 "vmovq\t{$src, $dst|$dst, $src}", []>, 4089 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 4090} // ExeDomain = SSEPackedInt 4091 4092def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 4093 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 4094 4095let Predicates = [HasAVX512] in { 4096 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 4097 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 4098} 4099 4100// Move Scalar Single to Double Int 4101// 4102let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4103def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 4104 (ins FR32X:$src), 4105 "vmovd\t{$src, $dst|$dst, $src}", 4106 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 4107 EVEX, Sched<[WriteVecMoveToGpr]>; 4108} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4109 4110// Move Quadword Int to Packed Quadword Int 4111// 4112let ExeDomain = SSEPackedInt in { 4113def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 4114 (ins i64mem:$src), 4115 "vmovq\t{$src, $dst|$dst, $src}", 4116 [(set VR128X:$dst, 4117 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4118 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 4119} // ExeDomain = SSEPackedInt 4120 4121// Allow "vmovd" but print "vmovq". 4122def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4123 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 4124def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4125 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 4126 4127// Conversions between masks and scalar fp. 4128def : Pat<(v32i1 (bitconvert FR32X:$src)), 4129 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 4130def : Pat<(f32 (bitconvert VK32:$src)), 4131 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 4132 4133def : Pat<(v64i1 (bitconvert FR64X:$src)), 4134 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 4135def : Pat<(f64 (bitconvert VK64:$src)), 4136 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 4137 4138//===----------------------------------------------------------------------===// 4139// AVX-512 MOVSH, MOVSS, MOVSD 4140//===----------------------------------------------------------------------===// 4141 4142multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 4143 X86VectorVTInfo _, Predicate prd = HasAVX512> { 4144 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in 4145 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4146 (ins _.RC:$src1, _.RC:$src2), 4147 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4148 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 4149 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 4150 let Predicates = [prd] in { 4151 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4152 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4153 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 4154 "$dst {${mask}} {z}, $src1, $src2}"), 4155 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4156 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4157 _.ImmAllZerosV)))], 4158 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 4159 let Constraints = "$src0 = $dst" in 4160 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4161 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4162 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 4163 "$dst {${mask}}, $src1, $src2}"), 4164 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4165 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4166 (_.VT _.RC:$src0))))], 4167 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 4168 let canFoldAsLoad = 1, isReMaterializable = 1 in { 4169 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 4170 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4171 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 4172 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4173 // _alt version uses FR32/FR64 register class. 4174 let isCodeGenOnly = 1 in 4175 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 4176 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4177 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 4178 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4179 } 4180 let mayLoad = 1, hasSideEffects = 0 in { 4181 let Constraints = "$src0 = $dst" in 4182 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4183 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 4184 !strconcat(asm, "\t{$src, $dst {${mask}}|", 4185 "$dst {${mask}}, $src}"), 4186 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 4187 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4188 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 4189 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 4190 "$dst {${mask}} {z}, $src}"), 4191 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 4192 } 4193 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 4194 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4195 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 4196 EVEX, Sched<[WriteFStore]>; 4197 let mayStore = 1, hasSideEffects = 0 in 4198 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 4199 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 4200 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 4201 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 4202 NotMemoryFoldable; 4203 } 4204} 4205 4206defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 4207 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 4208 4209defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 4210 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4211 4212defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 4213 HasFP16>, 4214 VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 4215 4216multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4217 PatLeaf ZeroFP, X86VectorVTInfo _> { 4218 4219def : Pat<(_.VT (OpNode _.RC:$src0, 4220 (_.VT (scalar_to_vector 4221 (_.EltVT (X86selects VK1WM:$mask, 4222 (_.EltVT _.FRC:$src1), 4223 (_.EltVT _.FRC:$src2))))))), 4224 (!cast<Instruction>(InstrStr#rrk) 4225 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4226 VK1WM:$mask, 4227 (_.VT _.RC:$src0), 4228 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4229 4230def : Pat<(_.VT (OpNode _.RC:$src0, 4231 (_.VT (scalar_to_vector 4232 (_.EltVT (X86selects VK1WM:$mask, 4233 (_.EltVT _.FRC:$src1), 4234 (_.EltVT ZeroFP))))))), 4235 (!cast<Instruction>(InstrStr#rrkz) 4236 VK1WM:$mask, 4237 (_.VT _.RC:$src0), 4238 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4239} 4240 4241multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4242 dag Mask, RegisterClass MaskRC> { 4243 4244def : Pat<(masked_store 4245 (_.info512.VT (insert_subvector undef, 4246 (_.info128.VT _.info128.RC:$src), 4247 (iPTR 0))), addr:$dst, Mask), 4248 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4249 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4250 _.info128.RC:$src)>; 4251 4252} 4253 4254multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4255 AVX512VLVectorVTInfo _, 4256 dag Mask, RegisterClass MaskRC, 4257 SubRegIndex subreg> { 4258 4259def : Pat<(masked_store 4260 (_.info512.VT (insert_subvector undef, 4261 (_.info128.VT _.info128.RC:$src), 4262 (iPTR 0))), addr:$dst, Mask), 4263 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4264 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4265 _.info128.RC:$src)>; 4266 4267} 4268 4269// This matches the more recent codegen from clang that avoids emitting a 512 4270// bit masked store directly. Codegen will widen 128-bit masked store to 512 4271// bits on AVX512F only targets. 4272multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4273 AVX512VLVectorVTInfo _, 4274 dag Mask512, dag Mask128, 4275 RegisterClass MaskRC, 4276 SubRegIndex subreg> { 4277 4278// AVX512F pattern. 4279def : Pat<(masked_store 4280 (_.info512.VT (insert_subvector undef, 4281 (_.info128.VT _.info128.RC:$src), 4282 (iPTR 0))), addr:$dst, Mask512), 4283 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4284 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4285 _.info128.RC:$src)>; 4286 4287// AVX512VL pattern. 4288def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4289 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4290 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4291 _.info128.RC:$src)>; 4292} 4293 4294multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4295 dag Mask, RegisterClass MaskRC> { 4296 4297def : Pat<(_.info128.VT (extract_subvector 4298 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4299 _.info512.ImmAllZerosV)), 4300 (iPTR 0))), 4301 (!cast<Instruction>(InstrStr#rmkz) 4302 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4303 addr:$srcAddr)>; 4304 4305def : Pat<(_.info128.VT (extract_subvector 4306 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4307 (_.info512.VT (insert_subvector undef, 4308 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4309 (iPTR 0))))), 4310 (iPTR 0))), 4311 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4312 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4313 addr:$srcAddr)>; 4314 4315} 4316 4317multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4318 AVX512VLVectorVTInfo _, 4319 dag Mask, RegisterClass MaskRC, 4320 SubRegIndex subreg> { 4321 4322def : Pat<(_.info128.VT (extract_subvector 4323 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4324 _.info512.ImmAllZerosV)), 4325 (iPTR 0))), 4326 (!cast<Instruction>(InstrStr#rmkz) 4327 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4328 addr:$srcAddr)>; 4329 4330def : Pat<(_.info128.VT (extract_subvector 4331 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4332 (_.info512.VT (insert_subvector undef, 4333 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4334 (iPTR 0))))), 4335 (iPTR 0))), 4336 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4337 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4338 addr:$srcAddr)>; 4339 4340} 4341 4342// This matches the more recent codegen from clang that avoids emitting a 512 4343// bit masked load directly. Codegen will widen 128-bit masked load to 512 4344// bits on AVX512F only targets. 4345multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4346 AVX512VLVectorVTInfo _, 4347 dag Mask512, dag Mask128, 4348 RegisterClass MaskRC, 4349 SubRegIndex subreg> { 4350// AVX512F patterns. 4351def : Pat<(_.info128.VT (extract_subvector 4352 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4353 _.info512.ImmAllZerosV)), 4354 (iPTR 0))), 4355 (!cast<Instruction>(InstrStr#rmkz) 4356 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4357 addr:$srcAddr)>; 4358 4359def : Pat<(_.info128.VT (extract_subvector 4360 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4361 (_.info512.VT (insert_subvector undef, 4362 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4363 (iPTR 0))))), 4364 (iPTR 0))), 4365 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4366 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4367 addr:$srcAddr)>; 4368 4369// AVX512Vl patterns. 4370def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4371 _.info128.ImmAllZerosV)), 4372 (!cast<Instruction>(InstrStr#rmkz) 4373 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4374 addr:$srcAddr)>; 4375 4376def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4377 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4378 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4379 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4380 addr:$srcAddr)>; 4381} 4382 4383defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4384defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4385 4386defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4387 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4388defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4389 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4390defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4391 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4392 4393let Predicates = [HasFP16] in { 4394defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4395defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4396 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4397defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4398 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4399defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4400 (v32i1 (insert_subvector 4401 (v32i1 immAllZerosV), 4402 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4403 (iPTR 0))), 4404 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4405 GR8, sub_8bit>; 4406 4407defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4408 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4409defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4410 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4411defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4412 (v32i1 (insert_subvector 4413 (v32i1 immAllZerosV), 4414 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4415 (iPTR 0))), 4416 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4417 GR8, sub_8bit>; 4418 4419def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4420 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4421 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4422 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4423 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4424 4425def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4426 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4427 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4428} 4429 4430defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4431 (v16i1 (insert_subvector 4432 (v16i1 immAllZerosV), 4433 (v4i1 (extract_subvector 4434 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4435 (iPTR 0))), 4436 (iPTR 0))), 4437 (v4i1 (extract_subvector 4438 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4439 (iPTR 0))), GR8, sub_8bit>; 4440defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4441 (v8i1 4442 (extract_subvector 4443 (v16i1 4444 (insert_subvector 4445 (v16i1 immAllZerosV), 4446 (v2i1 (extract_subvector 4447 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4448 (iPTR 0))), 4449 (iPTR 0))), 4450 (iPTR 0))), 4451 (v2i1 (extract_subvector 4452 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4453 (iPTR 0))), GR8, sub_8bit>; 4454 4455defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4456 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4457defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4458 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4459defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4460 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4461 4462defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4463 (v16i1 (insert_subvector 4464 (v16i1 immAllZerosV), 4465 (v4i1 (extract_subvector 4466 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4467 (iPTR 0))), 4468 (iPTR 0))), 4469 (v4i1 (extract_subvector 4470 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4471 (iPTR 0))), GR8, sub_8bit>; 4472defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4473 (v8i1 4474 (extract_subvector 4475 (v16i1 4476 (insert_subvector 4477 (v16i1 immAllZerosV), 4478 (v2i1 (extract_subvector 4479 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4480 (iPTR 0))), 4481 (iPTR 0))), 4482 (iPTR 0))), 4483 (v2i1 (extract_subvector 4484 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4485 (iPTR 0))), GR8, sub_8bit>; 4486 4487def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4488 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4489 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4490 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4491 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4492 4493def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4494 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4495 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4496 4497def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4498 (COPY_TO_REGCLASS 4499 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4500 VK1WM:$mask, addr:$src)), 4501 FR32X)>; 4502def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4503 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4504 4505def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4506 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4507 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4508 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4509 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4510 4511def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4512 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4513 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4514 4515def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4516 (COPY_TO_REGCLASS 4517 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4518 VK1WM:$mask, addr:$src)), 4519 FR64X)>; 4520def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4521 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4522 4523 4524def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4525 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4526def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4527 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4528 4529def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4530 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4531def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4532 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4533 4534let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4535 let Predicates = [HasFP16] in { 4536 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4537 (ins VR128X:$src1, VR128X:$src2), 4538 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4539 []>, T_MAP5XS, EVEX_4V, VEX_LIG, 4540 FoldGenData<"VMOVSHZrr">, 4541 Sched<[SchedWriteFShuffle.XMM]>; 4542 4543 let Constraints = "$src0 = $dst" in 4544 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4545 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4546 VR128X:$src1, VR128X:$src2), 4547 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4548 "$dst {${mask}}, $src1, $src2}", 4549 []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG, 4550 FoldGenData<"VMOVSHZrrk">, 4551 Sched<[SchedWriteFShuffle.XMM]>; 4552 4553 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4554 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4555 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4556 "$dst {${mask}} {z}, $src1, $src2}", 4557 []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG, 4558 FoldGenData<"VMOVSHZrrkz">, 4559 Sched<[SchedWriteFShuffle.XMM]>; 4560 } 4561 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4562 (ins VR128X:$src1, VR128X:$src2), 4563 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4564 []>, XS, EVEX_4V, VEX_LIG, 4565 FoldGenData<"VMOVSSZrr">, 4566 Sched<[SchedWriteFShuffle.XMM]>; 4567 4568 let Constraints = "$src0 = $dst" in 4569 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4570 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4571 VR128X:$src1, VR128X:$src2), 4572 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4573 "$dst {${mask}}, $src1, $src2}", 4574 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4575 FoldGenData<"VMOVSSZrrk">, 4576 Sched<[SchedWriteFShuffle.XMM]>; 4577 4578 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4579 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4580 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4581 "$dst {${mask}} {z}, $src1, $src2}", 4582 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4583 FoldGenData<"VMOVSSZrrkz">, 4584 Sched<[SchedWriteFShuffle.XMM]>; 4585 4586 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4587 (ins VR128X:$src1, VR128X:$src2), 4588 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4589 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4590 FoldGenData<"VMOVSDZrr">, 4591 Sched<[SchedWriteFShuffle.XMM]>; 4592 4593 let Constraints = "$src0 = $dst" in 4594 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4595 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4596 VR128X:$src1, VR128X:$src2), 4597 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4598 "$dst {${mask}}, $src1, $src2}", 4599 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4600 VEX_W, FoldGenData<"VMOVSDZrrk">, 4601 Sched<[SchedWriteFShuffle.XMM]>; 4602 4603 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4604 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4605 VR128X:$src2), 4606 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4607 "$dst {${mask}} {z}, $src1, $src2}", 4608 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4609 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4610 Sched<[SchedWriteFShuffle.XMM]>; 4611} 4612 4613def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4614 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4615def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4616 "$dst {${mask}}, $src1, $src2}", 4617 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4618 VR128X:$src1, VR128X:$src2), 0>; 4619def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4620 "$dst {${mask}} {z}, $src1, $src2}", 4621 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4622 VR128X:$src1, VR128X:$src2), 0>; 4623def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4624 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4625def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4626 "$dst {${mask}}, $src1, $src2}", 4627 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4628 VR128X:$src1, VR128X:$src2), 0>; 4629def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4630 "$dst {${mask}} {z}, $src1, $src2}", 4631 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4632 VR128X:$src1, VR128X:$src2), 0>; 4633def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4634 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4635def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4636 "$dst {${mask}}, $src1, $src2}", 4637 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4638 VR128X:$src1, VR128X:$src2), 0>; 4639def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4640 "$dst {${mask}} {z}, $src1, $src2}", 4641 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4642 VR128X:$src1, VR128X:$src2), 0>; 4643 4644let Predicates = [HasAVX512, OptForSize] in { 4645 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4646 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4647 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4648 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4649 4650 // Move low f32 and clear high bits. 4651 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4652 (SUBREG_TO_REG (i32 0), 4653 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4654 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4655 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4656 (SUBREG_TO_REG (i32 0), 4657 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4658 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4659 4660 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4661 (SUBREG_TO_REG (i32 0), 4662 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4663 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4664 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4665 (SUBREG_TO_REG (i32 0), 4666 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4667 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4668} 4669 4670// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4671// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4672let Predicates = [HasAVX512, OptForSpeed] in { 4673 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4674 (SUBREG_TO_REG (i32 0), 4675 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4676 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4677 (i8 1))), sub_xmm)>; 4678 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4679 (SUBREG_TO_REG (i32 0), 4680 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4681 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4682 (i8 3))), sub_xmm)>; 4683} 4684 4685let Predicates = [HasAVX512] in { 4686 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4687 (VMOVSSZrm addr:$src)>; 4688 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4689 (VMOVSDZrm addr:$src)>; 4690 4691 // Represent the same patterns above but in the form they appear for 4692 // 256-bit types 4693 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4694 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4695 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4696 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4697 4698 // Represent the same patterns above but in the form they appear for 4699 // 512-bit types 4700 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4701 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4702 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4703 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4704} 4705let Predicates = [HasFP16] in { 4706 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4707 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4708 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))), 4709 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>; 4710 4711 // FIXME we need better canonicalization in dag combine 4712 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4713 (SUBREG_TO_REG (i32 0), 4714 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4715 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4716 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))), 4717 (SUBREG_TO_REG (i32 0), 4718 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4719 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4720 4721 // FIXME we need better canonicalization in dag combine 4722 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4723 (SUBREG_TO_REG (i32 0), 4724 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4725 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4726 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))), 4727 (SUBREG_TO_REG (i32 0), 4728 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4729 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>; 4730 4731 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4732 (VMOVSHZrm addr:$src)>; 4733 4734 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4735 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4736 4737 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4738 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4739} 4740 4741let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4742def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4743 (ins VR128X:$src), 4744 "vmovq\t{$src, $dst|$dst, $src}", 4745 [(set VR128X:$dst, (v2i64 (X86vzmovl 4746 (v2i64 VR128X:$src))))]>, 4747 EVEX, VEX_W; 4748} 4749 4750let Predicates = [HasAVX512] in { 4751 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), 4752 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 4753 GR8:$src, sub_8bit)))>; 4754 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4755 (VMOVDI2PDIZrr GR32:$src)>; 4756 4757 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4758 (VMOV64toPQIZrr GR64:$src)>; 4759 4760 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4761 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4762 (VMOVDI2PDIZrm addr:$src)>; 4763 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4764 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4765 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4766 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4767 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4768 (VMOVQI2PQIZrm addr:$src)>; 4769 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4770 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4771 4772 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4773 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4774 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4775 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4776 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4777 4778 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4779 (SUBREG_TO_REG (i32 0), 4780 (v2f64 (VMOVZPQILo2PQIZrr 4781 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4782 sub_xmm)>; 4783 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4784 (SUBREG_TO_REG (i32 0), 4785 (v2i64 (VMOVZPQILo2PQIZrr 4786 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4787 sub_xmm)>; 4788 4789 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4790 (SUBREG_TO_REG (i32 0), 4791 (v2f64 (VMOVZPQILo2PQIZrr 4792 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4793 sub_xmm)>; 4794 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4795 (SUBREG_TO_REG (i32 0), 4796 (v2i64 (VMOVZPQILo2PQIZrr 4797 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4798 sub_xmm)>; 4799} 4800 4801//===----------------------------------------------------------------------===// 4802// AVX-512 - Non-temporals 4803//===----------------------------------------------------------------------===// 4804 4805def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4806 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4807 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4808 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4809 4810let Predicates = [HasVLX] in { 4811 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4812 (ins i256mem:$src), 4813 "vmovntdqa\t{$src, $dst|$dst, $src}", 4814 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4815 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4816 4817 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4818 (ins i128mem:$src), 4819 "vmovntdqa\t{$src, $dst|$dst, $src}", 4820 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4821 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4822} 4823 4824multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4825 X86SchedWriteMoveLS Sched, 4826 PatFrag st_frag = alignednontemporalstore> { 4827 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4828 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4829 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4830 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4831 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4832} 4833 4834multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4835 AVX512VLVectorVTInfo VTInfo, 4836 X86SchedWriteMoveLSWidths Sched> { 4837 let Predicates = [HasAVX512] in 4838 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4839 4840 let Predicates = [HasAVX512, HasVLX] in { 4841 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4842 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4843 } 4844} 4845 4846defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4847 SchedWriteVecMoveLSNT>, PD; 4848defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4849 SchedWriteFMoveLSNT>, PD, VEX_W; 4850defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4851 SchedWriteFMoveLSNT>, PS; 4852 4853let Predicates = [HasAVX512], AddedComplexity = 400 in { 4854 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4855 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4856 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4857 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4858 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4859 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4860 4861 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4862 (VMOVNTDQAZrm addr:$src)>; 4863 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4864 (VMOVNTDQAZrm addr:$src)>; 4865 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4866 (VMOVNTDQAZrm addr:$src)>; 4867 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4868 (VMOVNTDQAZrm addr:$src)>; 4869 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4870 (VMOVNTDQAZrm addr:$src)>; 4871 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4872 (VMOVNTDQAZrm addr:$src)>; 4873} 4874 4875let Predicates = [HasVLX], AddedComplexity = 400 in { 4876 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4877 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4878 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4879 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4880 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4881 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4882 4883 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4884 (VMOVNTDQAZ256rm addr:$src)>; 4885 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4886 (VMOVNTDQAZ256rm addr:$src)>; 4887 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4888 (VMOVNTDQAZ256rm addr:$src)>; 4889 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4890 (VMOVNTDQAZ256rm addr:$src)>; 4891 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4892 (VMOVNTDQAZ256rm addr:$src)>; 4893 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4894 (VMOVNTDQAZ256rm addr:$src)>; 4895 4896 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4897 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4898 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4899 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4900 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4901 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4902 4903 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4904 (VMOVNTDQAZ128rm addr:$src)>; 4905 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4906 (VMOVNTDQAZ128rm addr:$src)>; 4907 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4908 (VMOVNTDQAZ128rm addr:$src)>; 4909 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4910 (VMOVNTDQAZ128rm addr:$src)>; 4911 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4912 (VMOVNTDQAZ128rm addr:$src)>; 4913 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4914 (VMOVNTDQAZ128rm addr:$src)>; 4915} 4916 4917//===----------------------------------------------------------------------===// 4918// AVX-512 - Integer arithmetic 4919// 4920multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4921 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4922 bit IsCommutable = 0> { 4923 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4924 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4925 "$src2, $src1", "$src1, $src2", 4926 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4927 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4928 Sched<[sched]>; 4929 4930 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4931 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4932 "$src2, $src1", "$src1, $src2", 4933 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4934 AVX512BIBase, EVEX_4V, 4935 Sched<[sched.Folded, sched.ReadAfterFold]>; 4936} 4937 4938multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4939 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4940 bit IsCommutable = 0> : 4941 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4942 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4943 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4944 "${src2}"#_.BroadcastStr#", $src1", 4945 "$src1, ${src2}"#_.BroadcastStr, 4946 (_.VT (OpNode _.RC:$src1, 4947 (_.BroadcastLdFrag addr:$src2)))>, 4948 AVX512BIBase, EVEX_4V, EVEX_B, 4949 Sched<[sched.Folded, sched.ReadAfterFold]>; 4950} 4951 4952multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4953 AVX512VLVectorVTInfo VTInfo, 4954 X86SchedWriteWidths sched, Predicate prd, 4955 bit IsCommutable = 0> { 4956 let Predicates = [prd] in 4957 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4958 IsCommutable>, EVEX_V512; 4959 4960 let Predicates = [prd, HasVLX] in { 4961 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4962 sched.YMM, IsCommutable>, EVEX_V256; 4963 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4964 sched.XMM, IsCommutable>, EVEX_V128; 4965 } 4966} 4967 4968multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4969 AVX512VLVectorVTInfo VTInfo, 4970 X86SchedWriteWidths sched, Predicate prd, 4971 bit IsCommutable = 0> { 4972 let Predicates = [prd] in 4973 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4974 IsCommutable>, EVEX_V512; 4975 4976 let Predicates = [prd, HasVLX] in { 4977 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4978 sched.YMM, IsCommutable>, EVEX_V256; 4979 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4980 sched.XMM, IsCommutable>, EVEX_V128; 4981 } 4982} 4983 4984multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4985 X86SchedWriteWidths sched, Predicate prd, 4986 bit IsCommutable = 0> { 4987 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4988 sched, prd, IsCommutable>, 4989 VEX_W, EVEX_CD8<64, CD8VF>; 4990} 4991 4992multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4993 X86SchedWriteWidths sched, Predicate prd, 4994 bit IsCommutable = 0> { 4995 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4996 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4997} 4998 4999multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 5000 X86SchedWriteWidths sched, Predicate prd, 5001 bit IsCommutable = 0> { 5002 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 5003 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 5004 VEX_WIG; 5005} 5006 5007multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 5008 X86SchedWriteWidths sched, Predicate prd, 5009 bit IsCommutable = 0> { 5010 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 5011 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 5012 VEX_WIG; 5013} 5014 5015multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 5016 SDNode OpNode, X86SchedWriteWidths sched, 5017 Predicate prd, bit IsCommutable = 0> { 5018 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 5019 IsCommutable>; 5020 5021 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 5022 IsCommutable>; 5023} 5024 5025multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 5026 SDNode OpNode, X86SchedWriteWidths sched, 5027 Predicate prd, bit IsCommutable = 0> { 5028 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 5029 IsCommutable>; 5030 5031 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 5032 IsCommutable>; 5033} 5034 5035multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 5036 bits<8> opc_d, bits<8> opc_q, 5037 string OpcodeStr, SDNode OpNode, 5038 X86SchedWriteWidths sched, 5039 bit IsCommutable = 0> { 5040 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 5041 sched, HasAVX512, IsCommutable>, 5042 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 5043 sched, HasBWI, IsCommutable>; 5044} 5045 5046multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 5047 X86FoldableSchedWrite sched, 5048 SDNode OpNode,X86VectorVTInfo _Src, 5049 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 5050 bit IsCommutable = 0> { 5051 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 5052 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 5053 "$src2, $src1","$src1, $src2", 5054 (_Dst.VT (OpNode 5055 (_Src.VT _Src.RC:$src1), 5056 (_Src.VT _Src.RC:$src2))), 5057 IsCommutable>, 5058 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5059 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5060 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5061 "$src2, $src1", "$src1, $src2", 5062 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5063 (_Src.LdFrag addr:$src2)))>, 5064 AVX512BIBase, EVEX_4V, 5065 Sched<[sched.Folded, sched.ReadAfterFold]>; 5066 5067 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5068 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 5069 OpcodeStr, 5070 "${src2}"#_Brdct.BroadcastStr#", $src1", 5071 "$src1, ${src2}"#_Brdct.BroadcastStr, 5072 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5073 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 5074 AVX512BIBase, EVEX_4V, EVEX_B, 5075 Sched<[sched.Folded, sched.ReadAfterFold]>; 5076} 5077 5078defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 5079 SchedWriteVecALU, 1>; 5080defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 5081 SchedWriteVecALU, 0>; 5082defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 5083 SchedWriteVecALU, HasBWI, 1>; 5084defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 5085 SchedWriteVecALU, HasBWI, 0>; 5086defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 5087 SchedWriteVecALU, HasBWI, 1>; 5088defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 5089 SchedWriteVecALU, HasBWI, 0>; 5090defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 5091 SchedWritePMULLD, HasAVX512, 1>, T8PD; 5092defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 5093 SchedWriteVecIMul, HasBWI, 1>; 5094defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 5095 SchedWriteVecIMul, HasDQI, 1>, T8PD, 5096 NotEVEX2VEXConvertible; 5097defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 5098 HasBWI, 1>; 5099defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 5100 HasBWI, 1>; 5101defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 5102 SchedWriteVecIMul, HasBWI, 1>, T8PD; 5103defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, 5104 SchedWriteVecALU, HasBWI, 1>; 5105defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 5106 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 5107defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 5108 SchedWriteVecIMul, HasAVX512, 1>; 5109 5110multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 5111 X86SchedWriteWidths sched, 5112 AVX512VLVectorVTInfo _SrcVTInfo, 5113 AVX512VLVectorVTInfo _DstVTInfo, 5114 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 5115 let Predicates = [prd] in 5116 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 5117 _SrcVTInfo.info512, _DstVTInfo.info512, 5118 v8i64_info, IsCommutable>, 5119 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 5120 let Predicates = [HasVLX, prd] in { 5121 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 5122 _SrcVTInfo.info256, _DstVTInfo.info256, 5123 v4i64x_info, IsCommutable>, 5124 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 5125 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 5126 _SrcVTInfo.info128, _DstVTInfo.info128, 5127 v2i64x_info, IsCommutable>, 5128 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 5129 } 5130} 5131 5132defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 5133 avx512vl_i8_info, avx512vl_i8_info, 5134 X86multishift, HasVBMI, 0>, T8PD; 5135 5136multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5137 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 5138 X86FoldableSchedWrite sched> { 5139 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5140 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 5141 OpcodeStr, 5142 "${src2}"#_Src.BroadcastStr#", $src1", 5143 "$src1, ${src2}"#_Src.BroadcastStr, 5144 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5145 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 5146 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 5147 Sched<[sched.Folded, sched.ReadAfterFold]>; 5148} 5149 5150multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 5151 SDNode OpNode,X86VectorVTInfo _Src, 5152 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 5153 bit IsCommutable = 0> { 5154 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 5155 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 5156 "$src2, $src1","$src1, $src2", 5157 (_Dst.VT (OpNode 5158 (_Src.VT _Src.RC:$src1), 5159 (_Src.VT _Src.RC:$src2))), 5160 IsCommutable, IsCommutable>, 5161 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 5162 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5163 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5164 "$src2, $src1", "$src1, $src2", 5165 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5166 (_Src.LdFrag addr:$src2)))>, 5167 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 5168 Sched<[sched.Folded, sched.ReadAfterFold]>; 5169} 5170 5171multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 5172 SDNode OpNode> { 5173 let Predicates = [HasBWI] in 5174 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 5175 v32i16_info, SchedWriteShuffle.ZMM>, 5176 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 5177 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 5178 let Predicates = [HasBWI, HasVLX] in { 5179 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 5180 v16i16x_info, SchedWriteShuffle.YMM>, 5181 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 5182 v16i16x_info, SchedWriteShuffle.YMM>, 5183 EVEX_V256; 5184 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 5185 v8i16x_info, SchedWriteShuffle.XMM>, 5186 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 5187 v8i16x_info, SchedWriteShuffle.XMM>, 5188 EVEX_V128; 5189 } 5190} 5191multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 5192 SDNode OpNode> { 5193 let Predicates = [HasBWI] in 5194 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 5195 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 5196 let Predicates = [HasBWI, HasVLX] in { 5197 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 5198 v32i8x_info, SchedWriteShuffle.YMM>, 5199 EVEX_V256, VEX_WIG; 5200 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 5201 v16i8x_info, SchedWriteShuffle.XMM>, 5202 EVEX_V128, VEX_WIG; 5203 } 5204} 5205 5206multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 5207 SDNode OpNode, AVX512VLVectorVTInfo _Src, 5208 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 5209 let Predicates = [HasBWI] in 5210 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 5211 _Dst.info512, SchedWriteVecIMul.ZMM, 5212 IsCommutable>, EVEX_V512; 5213 let Predicates = [HasBWI, HasVLX] in { 5214 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 5215 _Dst.info256, SchedWriteVecIMul.YMM, 5216 IsCommutable>, EVEX_V256; 5217 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 5218 _Dst.info128, SchedWriteVecIMul.XMM, 5219 IsCommutable>, EVEX_V128; 5220 } 5221} 5222 5223defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 5224defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 5225defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 5226defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 5227 5228defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 5229 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 5230defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 5231 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 5232 5233defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 5234 SchedWriteVecALU, HasBWI, 1>, T8PD; 5235defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 5236 SchedWriteVecALU, HasBWI, 1>; 5237defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 5238 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5239defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 5240 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5241 NotEVEX2VEXConvertible; 5242 5243defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 5244 SchedWriteVecALU, HasBWI, 1>; 5245defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 5246 SchedWriteVecALU, HasBWI, 1>, T8PD; 5247defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 5248 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5249defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 5250 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5251 NotEVEX2VEXConvertible; 5252 5253defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 5254 SchedWriteVecALU, HasBWI, 1>, T8PD; 5255defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 5256 SchedWriteVecALU, HasBWI, 1>; 5257defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 5258 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5259defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 5260 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5261 NotEVEX2VEXConvertible; 5262 5263defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 5264 SchedWriteVecALU, HasBWI, 1>; 5265defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 5266 SchedWriteVecALU, HasBWI, 1>, T8PD; 5267defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 5268 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5269defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 5270 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5271 NotEVEX2VEXConvertible; 5272 5273// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 5274let Predicates = [HasDQI, NoVLX] in { 5275 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5276 (EXTRACT_SUBREG 5277 (VPMULLQZrr 5278 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5279 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5280 sub_ymm)>; 5281 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5282 (EXTRACT_SUBREG 5283 (VPMULLQZrmb 5284 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5285 addr:$src2), 5286 sub_ymm)>; 5287 5288 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5289 (EXTRACT_SUBREG 5290 (VPMULLQZrr 5291 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5292 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5293 sub_xmm)>; 5294 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5295 (EXTRACT_SUBREG 5296 (VPMULLQZrmb 5297 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5298 addr:$src2), 5299 sub_xmm)>; 5300} 5301 5302multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5303 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5304 (EXTRACT_SUBREG 5305 (!cast<Instruction>(Instr#"rr") 5306 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5307 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5308 sub_ymm)>; 5309 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5310 (EXTRACT_SUBREG 5311 (!cast<Instruction>(Instr#"rmb") 5312 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5313 addr:$src2), 5314 sub_ymm)>; 5315 5316 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5317 (EXTRACT_SUBREG 5318 (!cast<Instruction>(Instr#"rr") 5319 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5320 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5321 sub_xmm)>; 5322 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5323 (EXTRACT_SUBREG 5324 (!cast<Instruction>(Instr#"rmb") 5325 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5326 addr:$src2), 5327 sub_xmm)>; 5328} 5329 5330let Predicates = [HasAVX512, NoVLX] in { 5331 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5332 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5333 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5334 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5335} 5336 5337//===----------------------------------------------------------------------===// 5338// AVX-512 Logical Instructions 5339//===----------------------------------------------------------------------===// 5340 5341defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5342 SchedWriteVecLogic, HasAVX512, 1>; 5343defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5344 SchedWriteVecLogic, HasAVX512, 1>; 5345defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5346 SchedWriteVecLogic, HasAVX512, 1>; 5347defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5348 SchedWriteVecLogic, HasAVX512>; 5349 5350let Predicates = [HasVLX] in { 5351 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5352 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5353 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5354 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5355 5356 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5357 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5358 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5359 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5360 5361 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5362 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5363 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5364 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5365 5366 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5367 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5368 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5369 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5370 5371 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5372 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5373 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5374 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5375 5376 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5377 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5378 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5379 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5380 5381 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5382 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5383 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5384 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5385 5386 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5387 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5388 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5389 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5390 5391 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5392 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5393 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5394 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5395 5396 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5397 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5398 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5399 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5400 5401 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5402 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5403 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5404 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5405 5406 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5407 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5408 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5409 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5410 5411 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5412 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5413 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5414 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5415 5416 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5417 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5418 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5419 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5420 5421 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5422 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5423 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5424 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5425 5426 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5427 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5428 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5429 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5430} 5431 5432let Predicates = [HasAVX512] in { 5433 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5434 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5435 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5436 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5437 5438 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5439 (VPORQZrr VR512:$src1, VR512:$src2)>; 5440 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5441 (VPORQZrr VR512:$src1, VR512:$src2)>; 5442 5443 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5444 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5445 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5446 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5447 5448 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5449 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5450 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5451 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5452 5453 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5454 (VPANDQZrm VR512:$src1, addr:$src2)>; 5455 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5456 (VPANDQZrm VR512:$src1, addr:$src2)>; 5457 5458 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5459 (VPORQZrm VR512:$src1, addr:$src2)>; 5460 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5461 (VPORQZrm VR512:$src1, addr:$src2)>; 5462 5463 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5464 (VPXORQZrm VR512:$src1, addr:$src2)>; 5465 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5466 (VPXORQZrm VR512:$src1, addr:$src2)>; 5467 5468 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5469 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5470 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5471 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5472} 5473 5474// Patterns to catch vselect with different type than logic op. 5475multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5476 X86VectorVTInfo _, 5477 X86VectorVTInfo IntInfo> { 5478 // Masked register-register logical operations. 5479 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5480 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5481 _.RC:$src0)), 5482 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5483 _.RC:$src1, _.RC:$src2)>; 5484 5485 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5486 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5487 _.ImmAllZerosV)), 5488 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5489 _.RC:$src2)>; 5490 5491 // Masked register-memory logical operations. 5492 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5493 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5494 (load addr:$src2)))), 5495 _.RC:$src0)), 5496 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5497 _.RC:$src1, addr:$src2)>; 5498 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5499 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5500 (load addr:$src2)))), 5501 _.ImmAllZerosV)), 5502 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5503 addr:$src2)>; 5504} 5505 5506multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5507 X86VectorVTInfo _, 5508 X86VectorVTInfo IntInfo> { 5509 // Register-broadcast logical operations. 5510 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5511 (bitconvert 5512 (IntInfo.VT (OpNode _.RC:$src1, 5513 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5514 _.RC:$src0)), 5515 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5516 _.RC:$src1, addr:$src2)>; 5517 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5518 (bitconvert 5519 (IntInfo.VT (OpNode _.RC:$src1, 5520 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5521 _.ImmAllZerosV)), 5522 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5523 _.RC:$src1, addr:$src2)>; 5524} 5525 5526multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5527 AVX512VLVectorVTInfo SelectInfo, 5528 AVX512VLVectorVTInfo IntInfo> { 5529let Predicates = [HasVLX] in { 5530 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5531 IntInfo.info128>; 5532 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5533 IntInfo.info256>; 5534} 5535let Predicates = [HasAVX512] in { 5536 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5537 IntInfo.info512>; 5538} 5539} 5540 5541multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5542 AVX512VLVectorVTInfo SelectInfo, 5543 AVX512VLVectorVTInfo IntInfo> { 5544let Predicates = [HasVLX] in { 5545 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5546 SelectInfo.info128, IntInfo.info128>; 5547 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5548 SelectInfo.info256, IntInfo.info256>; 5549} 5550let Predicates = [HasAVX512] in { 5551 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5552 SelectInfo.info512, IntInfo.info512>; 5553} 5554} 5555 5556multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5557 // i64 vselect with i32/i16/i8 logic op 5558 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5559 avx512vl_i32_info>; 5560 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5561 avx512vl_i16_info>; 5562 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5563 avx512vl_i8_info>; 5564 5565 // i32 vselect with i64/i16/i8 logic op 5566 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5567 avx512vl_i64_info>; 5568 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5569 avx512vl_i16_info>; 5570 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5571 avx512vl_i8_info>; 5572 5573 // f32 vselect with i64/i32/i16/i8 logic op 5574 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5575 avx512vl_i64_info>; 5576 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5577 avx512vl_i32_info>; 5578 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5579 avx512vl_i16_info>; 5580 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5581 avx512vl_i8_info>; 5582 5583 // f64 vselect with i64/i32/i16/i8 logic op 5584 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5585 avx512vl_i64_info>; 5586 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5587 avx512vl_i32_info>; 5588 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5589 avx512vl_i16_info>; 5590 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5591 avx512vl_i8_info>; 5592 5593 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5594 avx512vl_f32_info, 5595 avx512vl_i32_info>; 5596 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5597 avx512vl_f64_info, 5598 avx512vl_i64_info>; 5599} 5600 5601defm : avx512_logical_lowering_types<"VPAND", and>; 5602defm : avx512_logical_lowering_types<"VPOR", or>; 5603defm : avx512_logical_lowering_types<"VPXOR", xor>; 5604defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5605 5606//===----------------------------------------------------------------------===// 5607// AVX-512 FP arithmetic 5608//===----------------------------------------------------------------------===// 5609 5610multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5611 SDPatternOperator OpNode, SDNode VecNode, 5612 X86FoldableSchedWrite sched, bit IsCommutable> { 5613 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5614 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5615 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5616 "$src2, $src1", "$src1, $src2", 5617 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5618 Sched<[sched]>; 5619 5620 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5621 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5622 "$src2, $src1", "$src1, $src2", 5623 (_.VT (VecNode _.RC:$src1, 5624 (_.ScalarIntMemFrags addr:$src2)))>, 5625 Sched<[sched.Folded, sched.ReadAfterFold]>; 5626 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5627 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5628 (ins _.FRC:$src1, _.FRC:$src2), 5629 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5630 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5631 Sched<[sched]> { 5632 let isCommutable = IsCommutable; 5633 } 5634 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5635 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5636 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5637 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5638 (_.ScalarLdFrag addr:$src2)))]>, 5639 Sched<[sched.Folded, sched.ReadAfterFold]>; 5640 } 5641 } 5642} 5643 5644multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5645 SDNode VecNode, X86FoldableSchedWrite sched> { 5646 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5647 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5648 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5649 "$rc, $src2, $src1", "$src1, $src2, $rc", 5650 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5651 (i32 timm:$rc))>, 5652 EVEX_B, EVEX_RC, Sched<[sched]>; 5653} 5654multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5655 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5656 X86FoldableSchedWrite sched, bit IsCommutable, 5657 string EVEX2VexOvrd> { 5658 let ExeDomain = _.ExeDomain in { 5659 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5660 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5661 "$src2, $src1", "$src1, $src2", 5662 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5663 Sched<[sched]>, SIMD_EXC; 5664 5665 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5666 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5667 "$src2, $src1", "$src1, $src2", 5668 (_.VT (VecNode _.RC:$src1, 5669 (_.ScalarIntMemFrags addr:$src2)))>, 5670 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5671 5672 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5673 Uses = [MXCSR], mayRaiseFPException = 1 in { 5674 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5675 (ins _.FRC:$src1, _.FRC:$src2), 5676 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5677 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5678 Sched<[sched]>, 5679 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5680 let isCommutable = IsCommutable; 5681 } 5682 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5683 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5684 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5685 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5686 (_.ScalarLdFrag addr:$src2)))]>, 5687 Sched<[sched.Folded, sched.ReadAfterFold]>, 5688 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5689 } 5690 5691 let Uses = [MXCSR] in 5692 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5693 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5694 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5695 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5696 EVEX_B, Sched<[sched]>; 5697 } 5698} 5699 5700multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5701 SDNode VecNode, SDNode RndNode, 5702 X86SchedWriteSizes sched, bit IsCommutable> { 5703 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5704 sched.PS.Scl, IsCommutable>, 5705 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5706 sched.PS.Scl>, 5707 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5708 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5709 sched.PD.Scl, IsCommutable>, 5710 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5711 sched.PD.Scl>, 5712 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5713 let Predicates = [HasFP16] in 5714 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5715 VecNode, sched.PH.Scl, IsCommutable>, 5716 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5717 sched.PH.Scl>, 5718 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5719} 5720 5721multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5722 SDNode VecNode, SDNode SaeNode, 5723 X86SchedWriteSizes sched, bit IsCommutable> { 5724 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5725 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5726 NAME#"SS">, 5727 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5728 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5729 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5730 NAME#"SD">, 5731 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5732 let Predicates = [HasFP16] in { 5733 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5734 VecNode, SaeNode, sched.PH.Scl, IsCommutable, 5735 NAME#"SH">, 5736 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, 5737 NotEVEX2VEXConvertible; 5738 } 5739} 5740defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5741 SchedWriteFAddSizes, 1>; 5742defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5743 SchedWriteFMulSizes, 1>; 5744defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5745 SchedWriteFAddSizes, 0>; 5746defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5747 SchedWriteFDivSizes, 0>; 5748defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5749 SchedWriteFCmpSizes, 0>; 5750defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5751 SchedWriteFCmpSizes, 0>; 5752 5753// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5754// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5755multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5756 X86VectorVTInfo _, SDNode OpNode, 5757 X86FoldableSchedWrite sched, 5758 string EVEX2VEXOvrd> { 5759 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5760 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5761 (ins _.FRC:$src1, _.FRC:$src2), 5762 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5763 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5764 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5765 let isCommutable = 1; 5766 } 5767 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5768 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5769 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5770 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5771 (_.ScalarLdFrag addr:$src2)))]>, 5772 Sched<[sched.Folded, sched.ReadAfterFold]>, 5773 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5774 } 5775} 5776defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5777 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5778 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5779 5780defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5781 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5782 VEX_W, EVEX_4V, VEX_LIG, 5783 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5784 5785defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5786 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5787 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5788 5789defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5790 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5791 VEX_W, EVEX_4V, VEX_LIG, 5792 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5793 5794defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5795 SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS, 5796 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5797 NotEVEX2VEXConvertible; 5798defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5799 SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS, 5800 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5801 NotEVEX2VEXConvertible; 5802 5803multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5804 SDPatternOperator MaskOpNode, 5805 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5806 bit IsCommutable, 5807 bit IsKCommutable = IsCommutable, 5808 string suffix = _.Suffix, 5809 string ClobberConstraint = "", 5810 bit MayRaiseFPException = 1> { 5811 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5812 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5813 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5814 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5815 "$src2, $src1", "$src1, $src2", 5816 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5817 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5818 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>; 5819 let mayLoad = 1 in { 5820 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5821 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5822 "$src2, $src1", "$src1, $src2", 5823 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5824 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5825 ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5826 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5827 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5828 "${src2}"#_.BroadcastStr#", $src1", 5829 "$src1, ${src2}"#_.BroadcastStr, 5830 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5831 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5832 ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5833 } 5834 } 5835} 5836 5837multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5838 SDPatternOperator OpNodeRnd, 5839 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5840 string suffix = _.Suffix, 5841 string ClobberConstraint = ""> { 5842 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5843 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5844 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5845 "$rc, $src2, $src1", "$src1, $src2, $rc", 5846 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5847 0, 0, 0, vselect_mask, ClobberConstraint>, 5848 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5849} 5850 5851multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5852 SDPatternOperator OpNodeSAE, 5853 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5854 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5855 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5856 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5857 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5858 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5859 EVEX_4V, EVEX_B, Sched<[sched]>; 5860} 5861 5862multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5863 SDPatternOperator MaskOpNode, 5864 Predicate prd, X86SchedWriteSizes sched, 5865 bit IsCommutable = 0, 5866 bit IsPD128Commutable = IsCommutable> { 5867 let Predicates = [prd] in { 5868 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5869 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5870 EVEX_CD8<32, CD8VF>; 5871 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5872 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5873 EVEX_CD8<64, CD8VF>; 5874 } 5875 5876 // Define only if AVX512VL feature is present. 5877 let Predicates = [prd, HasVLX] in { 5878 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5879 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5880 EVEX_CD8<32, CD8VF>; 5881 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5882 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5883 EVEX_CD8<32, CD8VF>; 5884 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5885 sched.PD.XMM, IsPD128Commutable, 5886 IsCommutable>, EVEX_V128, PD, VEX_W, 5887 EVEX_CD8<64, CD8VF>; 5888 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5889 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5890 EVEX_CD8<64, CD8VF>; 5891 } 5892} 5893 5894multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5895 SDPatternOperator MaskOpNode, 5896 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5897 let Predicates = [HasFP16] in { 5898 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5899 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS, 5900 EVEX_CD8<16, CD8VF>; 5901 } 5902 let Predicates = [HasVLX, HasFP16] in { 5903 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5904 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS, 5905 EVEX_CD8<16, CD8VF>; 5906 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5907 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS, 5908 EVEX_CD8<16, CD8VF>; 5909 } 5910} 5911 5912let Uses = [MXCSR] in 5913multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5914 X86SchedWriteSizes sched> { 5915 let Predicates = [HasFP16] in { 5916 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5917 v32f16_info>, 5918 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5919 } 5920 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5921 v16f32_info>, 5922 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5923 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5924 v8f64_info>, 5925 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5926} 5927 5928let Uses = [MXCSR] in 5929multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5930 X86SchedWriteSizes sched> { 5931 let Predicates = [HasFP16] in { 5932 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5933 v32f16_info>, 5934 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5935 } 5936 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5937 v16f32_info>, 5938 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5939 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5940 v8f64_info>, 5941 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5942} 5943 5944defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5945 SchedWriteFAddSizes, 1>, 5946 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5947 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5948defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5949 SchedWriteFMulSizes, 1>, 5950 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5951 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5952defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5953 SchedWriteFAddSizes>, 5954 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5955 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5956defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5957 SchedWriteFDivSizes>, 5958 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5959 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5960defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5961 SchedWriteFCmpSizes, 0>, 5962 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5963 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5964defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5965 SchedWriteFCmpSizes, 0>, 5966 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5967 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5968let isCodeGenOnly = 1 in { 5969 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5970 SchedWriteFCmpSizes, 1>, 5971 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5972 SchedWriteFCmpSizes, 1>; 5973 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5974 SchedWriteFCmpSizes, 1>, 5975 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5976 SchedWriteFCmpSizes, 1>; 5977} 5978let Uses = []<Register>, mayRaiseFPException = 0 in { 5979defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5980 SchedWriteFLogicSizes, 1>; 5981defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5982 SchedWriteFLogicSizes, 0>; 5983defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5984 SchedWriteFLogicSizes, 1>; 5985defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5986 SchedWriteFLogicSizes, 1>; 5987} 5988 5989multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5990 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5991 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5992 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5993 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5994 "$src2, $src1", "$src1, $src2", 5995 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5996 EVEX_4V, Sched<[sched]>; 5997 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5998 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5999 "$src2, $src1", "$src1, $src2", 6000 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 6001 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6002 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6003 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 6004 "${src2}"#_.BroadcastStr#", $src1", 6005 "$src1, ${src2}"#_.BroadcastStr, 6006 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 6007 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6008 } 6009} 6010 6011multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 6012 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6013 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 6014 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6015 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 6016 "$src2, $src1", "$src1, $src2", 6017 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 6018 Sched<[sched]>; 6019 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6020 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 6021 "$src2, $src1", "$src1, $src2", 6022 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 6023 Sched<[sched.Folded, sched.ReadAfterFold]>; 6024 } 6025} 6026 6027multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 6028 X86SchedWriteWidths sched> { 6029 let Predicates = [HasFP16] in { 6030 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 6031 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 6032 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 6033 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 6034 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 6035 EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>; 6036 } 6037 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 6038 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 6039 EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD; 6040 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 6041 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 6042 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 6043 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 6044 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 6045 X86scalefsRnd, sched.Scl>, 6046 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD; 6047 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 6048 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 6049 X86scalefsRnd, sched.Scl>, 6050 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD; 6051 6052 // Define only if AVX512VL feature is present. 6053 let Predicates = [HasVLX] in { 6054 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 6055 EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD; 6056 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 6057 EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD; 6058 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 6059 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 6060 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 6061 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 6062 } 6063 6064 let Predicates = [HasFP16, HasVLX] in { 6065 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 6066 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6067 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 6068 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6069 } 6070} 6071defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 6072 SchedWriteFAdd>, NotEVEX2VEXConvertible; 6073 6074//===----------------------------------------------------------------------===// 6075// AVX-512 VPTESTM instructions 6076//===----------------------------------------------------------------------===// 6077 6078multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 6079 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6080 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 6081 // There are just too many permutations due to commutability and bitcasts. 6082 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6083 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 6084 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6085 "$src2, $src1", "$src1, $src2", 6086 (null_frag), (null_frag), 1>, 6087 EVEX_4V, Sched<[sched]>; 6088 let mayLoad = 1 in 6089 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6090 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6091 "$src2, $src1", "$src1, $src2", 6092 (null_frag), (null_frag)>, 6093 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6094 Sched<[sched.Folded, sched.ReadAfterFold]>; 6095 } 6096} 6097 6098multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 6099 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6100 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 6101 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6102 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6103 "${src2}"#_.BroadcastStr#", $src1", 6104 "$src1, ${src2}"#_.BroadcastStr, 6105 (null_frag), (null_frag)>, 6106 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6107 Sched<[sched.Folded, sched.ReadAfterFold]>; 6108} 6109 6110multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 6111 X86SchedWriteWidths sched, 6112 AVX512VLVectorVTInfo _> { 6113 let Predicates = [HasAVX512] in 6114 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 6115 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 6116 6117 let Predicates = [HasAVX512, HasVLX] in { 6118 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 6119 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 6120 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 6121 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 6122 } 6123} 6124 6125multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 6126 X86SchedWriteWidths sched> { 6127 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 6128 avx512vl_i32_info>; 6129 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 6130 avx512vl_i64_info>, VEX_W; 6131} 6132 6133multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 6134 X86SchedWriteWidths sched> { 6135 let Predicates = [HasBWI] in { 6136 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 6137 v32i16_info>, EVEX_V512, VEX_W; 6138 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 6139 v64i8_info>, EVEX_V512; 6140 } 6141 6142 let Predicates = [HasVLX, HasBWI] in { 6143 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 6144 v16i16x_info>, EVEX_V256, VEX_W; 6145 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 6146 v8i16x_info>, EVEX_V128, VEX_W; 6147 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 6148 v32i8x_info>, EVEX_V256; 6149 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 6150 v16i8x_info>, EVEX_V128; 6151 } 6152} 6153 6154multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 6155 X86SchedWriteWidths sched> : 6156 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 6157 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 6158 6159defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 6160 SchedWriteVecLogic>, T8PD; 6161defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 6162 SchedWriteVecLogic>, T8XS; 6163 6164//===----------------------------------------------------------------------===// 6165// AVX-512 Shift instructions 6166//===----------------------------------------------------------------------===// 6167 6168multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 6169 string OpcodeStr, SDNode OpNode, 6170 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6171 let ExeDomain = _.ExeDomain in { 6172 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 6173 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 6174 "$src2, $src1", "$src1, $src2", 6175 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 6176 Sched<[sched]>; 6177 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6178 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 6179 "$src2, $src1", "$src1, $src2", 6180 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 6181 (i8 timm:$src2)))>, 6182 Sched<[sched.Folded]>; 6183 } 6184} 6185 6186multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 6187 string OpcodeStr, SDNode OpNode, 6188 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6189 let ExeDomain = _.ExeDomain in 6190 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6191 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 6192 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 6193 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 6194 EVEX_B, Sched<[sched.Folded]>; 6195} 6196 6197multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6198 X86FoldableSchedWrite sched, ValueType SrcVT, 6199 X86VectorVTInfo _> { 6200 // src2 is always 128-bit 6201 let ExeDomain = _.ExeDomain in { 6202 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6203 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 6204 "$src2, $src1", "$src1, $src2", 6205 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 6206 AVX512BIBase, EVEX_4V, Sched<[sched]>; 6207 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6208 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 6209 "$src2, $src1", "$src1, $src2", 6210 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 6211 AVX512BIBase, 6212 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6213 } 6214} 6215 6216multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6217 X86SchedWriteWidths sched, ValueType SrcVT, 6218 AVX512VLVectorVTInfo VTInfo, 6219 Predicate prd> { 6220 let Predicates = [prd] in 6221 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 6222 VTInfo.info512>, EVEX_V512, 6223 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 6224 let Predicates = [prd, HasVLX] in { 6225 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 6226 VTInfo.info256>, EVEX_V256, 6227 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 6228 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 6229 VTInfo.info128>, EVEX_V128, 6230 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 6231 } 6232} 6233 6234multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 6235 string OpcodeStr, SDNode OpNode, 6236 X86SchedWriteWidths sched, 6237 bit NotEVEX2VEXConvertibleQ = 0> { 6238 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 6239 avx512vl_i32_info, HasAVX512>; 6240 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6241 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 6242 avx512vl_i64_info, HasAVX512>, VEX_W; 6243 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 6244 avx512vl_i16_info, HasBWI>; 6245} 6246 6247multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6248 string OpcodeStr, SDNode OpNode, 6249 X86SchedWriteWidths sched, 6250 AVX512VLVectorVTInfo VTInfo> { 6251 let Predicates = [HasAVX512] in 6252 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6253 sched.ZMM, VTInfo.info512>, 6254 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 6255 VTInfo.info512>, EVEX_V512; 6256 let Predicates = [HasAVX512, HasVLX] in { 6257 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6258 sched.YMM, VTInfo.info256>, 6259 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 6260 VTInfo.info256>, EVEX_V256; 6261 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6262 sched.XMM, VTInfo.info128>, 6263 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 6264 VTInfo.info128>, EVEX_V128; 6265 } 6266} 6267 6268multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 6269 string OpcodeStr, SDNode OpNode, 6270 X86SchedWriteWidths sched> { 6271 let Predicates = [HasBWI] in 6272 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6273 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 6274 let Predicates = [HasVLX, HasBWI] in { 6275 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6276 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 6277 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6278 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 6279 } 6280} 6281 6282multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 6283 Format ImmFormR, Format ImmFormM, 6284 string OpcodeStr, SDNode OpNode, 6285 X86SchedWriteWidths sched, 6286 bit NotEVEX2VEXConvertibleQ = 0> { 6287 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6288 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6289 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6290 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6291 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 6292} 6293 6294defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6295 SchedWriteVecShiftImm>, 6296 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6297 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6298 6299defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6300 SchedWriteVecShiftImm>, 6301 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6302 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6303 6304defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6305 SchedWriteVecShiftImm, 1>, 6306 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6307 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6308 6309defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6310 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6311defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6312 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6313 6314defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6315 SchedWriteVecShift>; 6316defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6317 SchedWriteVecShift, 1>; 6318defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6319 SchedWriteVecShift>; 6320 6321// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6322let Predicates = [HasAVX512, NoVLX] in { 6323 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6324 (EXTRACT_SUBREG (v8i64 6325 (VPSRAQZrr 6326 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6327 VR128X:$src2)), sub_ymm)>; 6328 6329 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6330 (EXTRACT_SUBREG (v8i64 6331 (VPSRAQZrr 6332 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6333 VR128X:$src2)), sub_xmm)>; 6334 6335 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6336 (EXTRACT_SUBREG (v8i64 6337 (VPSRAQZri 6338 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6339 timm:$src2)), sub_ymm)>; 6340 6341 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6342 (EXTRACT_SUBREG (v8i64 6343 (VPSRAQZri 6344 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6345 timm:$src2)), sub_xmm)>; 6346} 6347 6348//===-------------------------------------------------------------------===// 6349// Variable Bit Shifts 6350//===-------------------------------------------------------------------===// 6351 6352multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6353 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6354 let ExeDomain = _.ExeDomain in { 6355 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6356 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6357 "$src2, $src1", "$src1, $src2", 6358 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6359 AVX5128IBase, EVEX_4V, Sched<[sched]>; 6360 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6361 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6362 "$src2, $src1", "$src1, $src2", 6363 (_.VT (OpNode _.RC:$src1, 6364 (_.VT (_.LdFrag addr:$src2))))>, 6365 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6366 Sched<[sched.Folded, sched.ReadAfterFold]>; 6367 } 6368} 6369 6370multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6371 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6372 let ExeDomain = _.ExeDomain in 6373 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6374 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6375 "${src2}"#_.BroadcastStr#", $src1", 6376 "$src1, ${src2}"#_.BroadcastStr, 6377 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6378 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6379 Sched<[sched.Folded, sched.ReadAfterFold]>; 6380} 6381 6382multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6383 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6384 let Predicates = [HasAVX512] in 6385 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6386 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6387 6388 let Predicates = [HasAVX512, HasVLX] in { 6389 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6390 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6391 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6392 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6393 } 6394} 6395 6396multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6397 SDNode OpNode, X86SchedWriteWidths sched> { 6398 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6399 avx512vl_i32_info>; 6400 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6401 avx512vl_i64_info>, VEX_W; 6402} 6403 6404// Use 512bit version to implement 128/256 bit in case NoVLX. 6405multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6406 SDNode OpNode, list<Predicate> p> { 6407 let Predicates = p in { 6408 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6409 (_.info256.VT _.info256.RC:$src2))), 6410 (EXTRACT_SUBREG 6411 (!cast<Instruction>(OpcodeStr#"Zrr") 6412 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6413 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6414 sub_ymm)>; 6415 6416 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6417 (_.info128.VT _.info128.RC:$src2))), 6418 (EXTRACT_SUBREG 6419 (!cast<Instruction>(OpcodeStr#"Zrr") 6420 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6421 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6422 sub_xmm)>; 6423 } 6424} 6425multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6426 SDNode OpNode, X86SchedWriteWidths sched> { 6427 let Predicates = [HasBWI] in 6428 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6429 EVEX_V512, VEX_W; 6430 let Predicates = [HasVLX, HasBWI] in { 6431 6432 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6433 EVEX_V256, VEX_W; 6434 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6435 EVEX_V128, VEX_W; 6436 } 6437} 6438 6439defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6440 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6441 6442defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6443 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6444 6445defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6446 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6447 6448defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6449defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6450 6451defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6452defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6453defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6454defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6455 6456 6457// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6458let Predicates = [HasAVX512, NoVLX] in { 6459 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6460 (EXTRACT_SUBREG (v8i64 6461 (VPROLVQZrr 6462 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6463 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6464 sub_xmm)>; 6465 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6466 (EXTRACT_SUBREG (v8i64 6467 (VPROLVQZrr 6468 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6469 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6470 sub_ymm)>; 6471 6472 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6473 (EXTRACT_SUBREG (v16i32 6474 (VPROLVDZrr 6475 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6476 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6477 sub_xmm)>; 6478 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6479 (EXTRACT_SUBREG (v16i32 6480 (VPROLVDZrr 6481 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6482 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6483 sub_ymm)>; 6484 6485 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6486 (EXTRACT_SUBREG (v8i64 6487 (VPROLQZri 6488 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6489 timm:$src2)), sub_xmm)>; 6490 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6491 (EXTRACT_SUBREG (v8i64 6492 (VPROLQZri 6493 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6494 timm:$src2)), sub_ymm)>; 6495 6496 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6497 (EXTRACT_SUBREG (v16i32 6498 (VPROLDZri 6499 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6500 timm:$src2)), sub_xmm)>; 6501 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6502 (EXTRACT_SUBREG (v16i32 6503 (VPROLDZri 6504 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6505 timm:$src2)), sub_ymm)>; 6506} 6507 6508// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6509let Predicates = [HasAVX512, NoVLX] in { 6510 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6511 (EXTRACT_SUBREG (v8i64 6512 (VPRORVQZrr 6513 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6514 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6515 sub_xmm)>; 6516 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6517 (EXTRACT_SUBREG (v8i64 6518 (VPRORVQZrr 6519 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6520 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6521 sub_ymm)>; 6522 6523 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6524 (EXTRACT_SUBREG (v16i32 6525 (VPRORVDZrr 6526 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6527 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6528 sub_xmm)>; 6529 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6530 (EXTRACT_SUBREG (v16i32 6531 (VPRORVDZrr 6532 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6533 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6534 sub_ymm)>; 6535 6536 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6537 (EXTRACT_SUBREG (v8i64 6538 (VPRORQZri 6539 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6540 timm:$src2)), sub_xmm)>; 6541 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6542 (EXTRACT_SUBREG (v8i64 6543 (VPRORQZri 6544 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6545 timm:$src2)), sub_ymm)>; 6546 6547 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6548 (EXTRACT_SUBREG (v16i32 6549 (VPRORDZri 6550 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6551 timm:$src2)), sub_xmm)>; 6552 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6553 (EXTRACT_SUBREG (v16i32 6554 (VPRORDZri 6555 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6556 timm:$src2)), sub_ymm)>; 6557} 6558 6559//===-------------------------------------------------------------------===// 6560// 1-src variable permutation VPERMW/D/Q 6561//===-------------------------------------------------------------------===// 6562 6563multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6564 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6565 let Predicates = [HasAVX512] in 6566 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6567 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6568 6569 let Predicates = [HasAVX512, HasVLX] in 6570 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6571 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6572} 6573 6574multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6575 string OpcodeStr, SDNode OpNode, 6576 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6577 let Predicates = [HasAVX512] in 6578 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6579 sched, VTInfo.info512>, 6580 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6581 sched, VTInfo.info512>, EVEX_V512; 6582 let Predicates = [HasAVX512, HasVLX] in 6583 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6584 sched, VTInfo.info256>, 6585 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6586 sched, VTInfo.info256>, EVEX_V256; 6587} 6588 6589multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6590 Predicate prd, SDNode OpNode, 6591 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6592 let Predicates = [prd] in 6593 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6594 EVEX_V512 ; 6595 let Predicates = [HasVLX, prd] in { 6596 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6597 EVEX_V256 ; 6598 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6599 EVEX_V128 ; 6600 } 6601} 6602 6603defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6604 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6605defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6606 WriteVarShuffle256, avx512vl_i8_info>; 6607 6608defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6609 WriteVarShuffle256, avx512vl_i32_info>; 6610defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6611 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6612defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6613 WriteFVarShuffle256, avx512vl_f32_info>; 6614defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6615 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6616 6617defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6618 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6619 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6620defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6621 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6622 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6623 6624//===----------------------------------------------------------------------===// 6625// AVX-512 - VPERMIL 6626//===----------------------------------------------------------------------===// 6627 6628multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6629 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6630 X86VectorVTInfo Ctrl> { 6631 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6632 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6633 "$src2, $src1", "$src1, $src2", 6634 (_.VT (OpNode _.RC:$src1, 6635 (Ctrl.VT Ctrl.RC:$src2)))>, 6636 T8PD, EVEX_4V, Sched<[sched]>; 6637 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6638 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6639 "$src2, $src1", "$src1, $src2", 6640 (_.VT (OpNode 6641 _.RC:$src1, 6642 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6643 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6644 Sched<[sched.Folded, sched.ReadAfterFold]>; 6645 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6646 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6647 "${src2}"#_.BroadcastStr#", $src1", 6648 "$src1, ${src2}"#_.BroadcastStr, 6649 (_.VT (OpNode 6650 _.RC:$src1, 6651 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6652 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6653 Sched<[sched.Folded, sched.ReadAfterFold]>; 6654} 6655 6656multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6657 X86SchedWriteWidths sched, 6658 AVX512VLVectorVTInfo _, 6659 AVX512VLVectorVTInfo Ctrl> { 6660 let Predicates = [HasAVX512] in { 6661 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6662 _.info512, Ctrl.info512>, EVEX_V512; 6663 } 6664 let Predicates = [HasAVX512, HasVLX] in { 6665 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6666 _.info128, Ctrl.info128>, EVEX_V128; 6667 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6668 _.info256, Ctrl.info256>, EVEX_V256; 6669 } 6670} 6671 6672multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6673 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6674 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6675 _, Ctrl>; 6676 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6677 X86VPermilpi, SchedWriteFShuffle, _>, 6678 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6679} 6680 6681let ExeDomain = SSEPackedSingle in 6682defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6683 avx512vl_i32_info>; 6684let ExeDomain = SSEPackedDouble in 6685defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6686 avx512vl_i64_info>, VEX_W1X; 6687 6688//===----------------------------------------------------------------------===// 6689// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6690//===----------------------------------------------------------------------===// 6691 6692defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6693 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6694 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6695defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6696 X86PShufhw, SchedWriteShuffle>, 6697 EVEX, AVX512XSIi8Base; 6698defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6699 X86PShuflw, SchedWriteShuffle>, 6700 EVEX, AVX512XDIi8Base; 6701 6702//===----------------------------------------------------------------------===// 6703// AVX-512 - VPSHUFB 6704//===----------------------------------------------------------------------===// 6705 6706multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6707 X86SchedWriteWidths sched> { 6708 let Predicates = [HasBWI] in 6709 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6710 EVEX_V512; 6711 6712 let Predicates = [HasVLX, HasBWI] in { 6713 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6714 EVEX_V256; 6715 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6716 EVEX_V128; 6717 } 6718} 6719 6720defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6721 SchedWriteVarShuffle>, VEX_WIG; 6722 6723//===----------------------------------------------------------------------===// 6724// Move Low to High and High to Low packed FP Instructions 6725//===----------------------------------------------------------------------===// 6726 6727def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6728 (ins VR128X:$src1, VR128X:$src2), 6729 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6730 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6731 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6732let isCommutable = 1 in 6733def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6734 (ins VR128X:$src1, VR128X:$src2), 6735 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6736 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6737 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6738 6739//===----------------------------------------------------------------------===// 6740// VMOVHPS/PD VMOVLPS Instructions 6741// All patterns was taken from SSS implementation. 6742//===----------------------------------------------------------------------===// 6743 6744multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6745 SDPatternOperator OpNode, 6746 X86VectorVTInfo _> { 6747 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6748 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6749 (ins _.RC:$src1, f64mem:$src2), 6750 !strconcat(OpcodeStr, 6751 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6752 [(set _.RC:$dst, 6753 (OpNode _.RC:$src1, 6754 (_.VT (bitconvert 6755 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6756 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6757} 6758 6759// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6760// SSE1. And MOVLPS pattern is even more complex. 6761defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6762 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6763defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6764 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6765defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6766 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6767defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6768 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6769 6770let Predicates = [HasAVX512] in { 6771 // VMOVHPD patterns 6772 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6773 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6774 6775 // VMOVLPD patterns 6776 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6777 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6778} 6779 6780let SchedRW = [WriteFStore] in { 6781let mayStore = 1, hasSideEffects = 0 in 6782def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6783 (ins f64mem:$dst, VR128X:$src), 6784 "vmovhps\t{$src, $dst|$dst, $src}", 6785 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6786def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6787 (ins f64mem:$dst, VR128X:$src), 6788 "vmovhpd\t{$src, $dst|$dst, $src}", 6789 [(store (f64 (extractelt 6790 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6791 (iPTR 0))), addr:$dst)]>, 6792 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6793let mayStore = 1, hasSideEffects = 0 in 6794def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6795 (ins f64mem:$dst, VR128X:$src), 6796 "vmovlps\t{$src, $dst|$dst, $src}", 6797 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6798def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6799 (ins f64mem:$dst, VR128X:$src), 6800 "vmovlpd\t{$src, $dst|$dst, $src}", 6801 [(store (f64 (extractelt (v2f64 VR128X:$src), 6802 (iPTR 0))), addr:$dst)]>, 6803 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6804} // SchedRW 6805 6806let Predicates = [HasAVX512] in { 6807 // VMOVHPD patterns 6808 def : Pat<(store (f64 (extractelt 6809 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6810 (iPTR 0))), addr:$dst), 6811 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6812} 6813//===----------------------------------------------------------------------===// 6814// FMA - Fused Multiply Operations 6815// 6816 6817multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6818 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6819 X86VectorVTInfo _> { 6820 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6821 Uses = [MXCSR], mayRaiseFPException = 1 in { 6822 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6823 (ins _.RC:$src2, _.RC:$src3), 6824 OpcodeStr, "$src3, $src2", "$src2, $src3", 6825 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6826 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6827 EVEX_4V, Sched<[sched]>; 6828 6829 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6830 (ins _.RC:$src2, _.MemOp:$src3), 6831 OpcodeStr, "$src3, $src2", "$src2, $src3", 6832 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6833 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6834 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6835 6836 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6837 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6838 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6839 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6840 (OpNode _.RC:$src2, 6841 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6842 (MaskOpNode _.RC:$src2, 6843 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6844 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6845 } 6846} 6847 6848multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6849 X86FoldableSchedWrite sched, 6850 X86VectorVTInfo _> { 6851 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6852 Uses = [MXCSR] in 6853 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6854 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6855 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6856 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6857 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6858 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6859} 6860 6861multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6862 SDNode MaskOpNode, SDNode OpNodeRnd, 6863 X86SchedWriteWidths sched, 6864 AVX512VLVectorVTInfo _, 6865 Predicate prd = HasAVX512> { 6866 let Predicates = [prd] in { 6867 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6868 sched.ZMM, _.info512>, 6869 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6870 _.info512>, 6871 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6872 } 6873 let Predicates = [HasVLX, prd] in { 6874 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6875 sched.YMM, _.info256>, 6876 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6877 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6878 sched.XMM, _.info128>, 6879 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6880 } 6881} 6882 6883multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6884 SDNode MaskOpNode, SDNode OpNodeRnd> { 6885 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6886 OpNodeRnd, SchedWriteFMA, 6887 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6888 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6889 OpNodeRnd, SchedWriteFMA, 6890 avx512vl_f32_info>, T8PD; 6891 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6892 OpNodeRnd, SchedWriteFMA, 6893 avx512vl_f64_info>, T8PD, VEX_W; 6894} 6895 6896defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6897 fma, X86FmaddRnd>; 6898defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6899 X86Fmsub, X86FmsubRnd>; 6900defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6901 X86Fmaddsub, X86FmaddsubRnd>; 6902defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6903 X86Fmsubadd, X86FmsubaddRnd>; 6904defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6905 X86Fnmadd, X86FnmaddRnd>; 6906defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6907 X86Fnmsub, X86FnmsubRnd>; 6908 6909 6910multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6911 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6912 X86VectorVTInfo _> { 6913 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6914 Uses = [MXCSR], mayRaiseFPException = 1 in { 6915 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6916 (ins _.RC:$src2, _.RC:$src3), 6917 OpcodeStr, "$src3, $src2", "$src2, $src3", 6918 (null_frag), 6919 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6920 EVEX_4V, Sched<[sched]>; 6921 6922 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6923 (ins _.RC:$src2, _.MemOp:$src3), 6924 OpcodeStr, "$src3, $src2", "$src2, $src3", 6925 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6926 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6927 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6928 6929 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6930 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6931 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6932 "$src2, ${src3}"#_.BroadcastStr, 6933 (_.VT (OpNode _.RC:$src2, 6934 (_.VT (_.BroadcastLdFrag addr:$src3)), 6935 _.RC:$src1)), 6936 (_.VT (MaskOpNode _.RC:$src2, 6937 (_.VT (_.BroadcastLdFrag addr:$src3)), 6938 _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B, 6939 Sched<[sched.Folded, sched.ReadAfterFold]>; 6940 } 6941} 6942 6943multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6944 X86FoldableSchedWrite sched, 6945 X86VectorVTInfo _> { 6946 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6947 Uses = [MXCSR] in 6948 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6949 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6950 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6951 (null_frag), 6952 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6953 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6954} 6955 6956multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6957 SDNode MaskOpNode, SDNode OpNodeRnd, 6958 X86SchedWriteWidths sched, 6959 AVX512VLVectorVTInfo _, 6960 Predicate prd = HasAVX512> { 6961 let Predicates = [prd] in { 6962 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6963 sched.ZMM, _.info512>, 6964 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6965 _.info512>, 6966 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6967 } 6968 let Predicates = [HasVLX, prd] in { 6969 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6970 sched.YMM, _.info256>, 6971 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6972 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6973 sched.XMM, _.info128>, 6974 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6975 } 6976} 6977 6978multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6979 SDNode MaskOpNode, SDNode OpNodeRnd > { 6980 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6981 OpNodeRnd, SchedWriteFMA, 6982 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6983 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6984 OpNodeRnd, SchedWriteFMA, 6985 avx512vl_f32_info>, T8PD; 6986 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6987 OpNodeRnd, SchedWriteFMA, 6988 avx512vl_f64_info>, T8PD, VEX_W; 6989} 6990 6991defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6992 fma, X86FmaddRnd>; 6993defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6994 X86Fmsub, X86FmsubRnd>; 6995defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6996 X86Fmaddsub, X86FmaddsubRnd>; 6997defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6998 X86Fmsubadd, X86FmsubaddRnd>; 6999defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 7000 X86Fnmadd, X86FnmaddRnd>; 7001defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 7002 X86Fnmsub, X86FnmsubRnd>; 7003 7004multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7005 SDNode MaskOpNode, X86FoldableSchedWrite sched, 7006 X86VectorVTInfo _> { 7007 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 7008 Uses = [MXCSR], mayRaiseFPException = 1 in { 7009 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 7010 (ins _.RC:$src2, _.RC:$src3), 7011 OpcodeStr, "$src3, $src2", "$src2, $src3", 7012 (null_frag), 7013 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 7014 EVEX_4V, Sched<[sched]>; 7015 7016 // Pattern is 312 order so that the load is in a different place from the 7017 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7018 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 7019 (ins _.RC:$src2, _.MemOp:$src3), 7020 OpcodeStr, "$src3, $src2", "$src2, $src3", 7021 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 7022 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 7023 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7024 7025 // Pattern is 312 order so that the load is in a different place from the 7026 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7027 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 7028 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7029 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 7030 "$src2, ${src3}"#_.BroadcastStr, 7031 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 7032 _.RC:$src1, _.RC:$src2)), 7033 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 7034 _.RC:$src1, _.RC:$src2)), 1, 0>, 7035 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 7036 } 7037} 7038 7039multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 7040 X86FoldableSchedWrite sched, 7041 X86VectorVTInfo _> { 7042 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 7043 Uses = [MXCSR] in 7044 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 7045 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 7046 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 7047 (null_frag), 7048 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 7049 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 7050} 7051 7052multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7053 SDNode MaskOpNode, SDNode OpNodeRnd, 7054 X86SchedWriteWidths sched, 7055 AVX512VLVectorVTInfo _, 7056 Predicate prd = HasAVX512> { 7057 let Predicates = [prd] in { 7058 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7059 sched.ZMM, _.info512>, 7060 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 7061 _.info512>, 7062 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7063 } 7064 let Predicates = [HasVLX, prd] in { 7065 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7066 sched.YMM, _.info256>, 7067 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7068 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7069 sched.XMM, _.info128>, 7070 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7071 } 7072} 7073 7074multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7075 SDNode MaskOpNode, SDNode OpNodeRnd > { 7076 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 7077 OpNodeRnd, SchedWriteFMA, 7078 avx512vl_f16_info, HasFP16>, T_MAP6PD; 7079 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 7080 OpNodeRnd, SchedWriteFMA, 7081 avx512vl_f32_info>, T8PD; 7082 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 7083 OpNodeRnd, SchedWriteFMA, 7084 avx512vl_f64_info>, T8PD, VEX_W; 7085} 7086 7087defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 7088 fma, X86FmaddRnd>; 7089defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 7090 X86Fmsub, X86FmsubRnd>; 7091defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 7092 X86Fmaddsub, X86FmaddsubRnd>; 7093defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 7094 X86Fmsubadd, X86FmsubaddRnd>; 7095defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 7096 X86Fnmadd, X86FnmaddRnd>; 7097defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 7098 X86Fnmsub, X86FnmsubRnd>; 7099 7100// Scalar FMA 7101multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7102 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 7103let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 7104 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7105 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 7106 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7107 EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 7108 7109 let mayLoad = 1 in 7110 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7111 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 7112 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7113 EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 7114 7115 let Uses = [MXCSR] in 7116 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7117 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 7118 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 7119 EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 7120 7121 let isCodeGenOnly = 1, isCommutable = 1 in { 7122 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7123 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 7124 !strconcat(OpcodeStr, 7125 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7126 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC; 7127 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 7128 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 7129 !strconcat(OpcodeStr, 7130 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7131 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC; 7132 7133 let Uses = [MXCSR] in 7134 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7135 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 7136 !strconcat(OpcodeStr, 7137 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 7138 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 7139 Sched<[SchedWriteFMA.Scl]>, EVEX_4V; 7140 }// isCodeGenOnly = 1 7141}// Constraints = "$src1 = $dst" 7142} 7143 7144multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7145 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 7146 X86VectorVTInfo _, string SUFF> { 7147 let ExeDomain = _.ExeDomain in { 7148 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 7149 // Operands for intrinsic are in 123 order to preserve passthu 7150 // semantics. 7151 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7152 _.FRC:$src3))), 7153 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7154 (_.ScalarLdFrag addr:$src3)))), 7155 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 7156 _.FRC:$src3, (i32 timm:$rc)))), 0>; 7157 7158 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 7159 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 7160 _.FRC:$src1))), 7161 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 7162 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 7163 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 7164 _.FRC:$src1, (i32 timm:$rc)))), 1>; 7165 7166 // One pattern is 312 order so that the load is in a different place from the 7167 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7168 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 7169 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 7170 _.FRC:$src2))), 7171 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 7172 _.FRC:$src1, _.FRC:$src2))), 7173 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 7174 _.FRC:$src2, (i32 timm:$rc)))), 1>; 7175 } 7176} 7177 7178multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7179 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 7180 let Predicates = [HasAVX512] in { 7181 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7182 OpNodeRnd, f32x_info, "SS">, 7183 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD; 7184 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7185 OpNodeRnd, f64x_info, "SD">, 7186 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD; 7187 } 7188 let Predicates = [HasFP16] in { 7189 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7190 OpNodeRnd, f16x_info, "SH">, 7191 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD; 7192 } 7193} 7194 7195defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 7196defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 7197defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 7198defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 7199 7200multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 7201 SDNode RndOp, string Prefix, 7202 string Suffix, SDNode Move, 7203 X86VectorVTInfo _, PatLeaf ZeroFP, 7204 Predicate prd = HasAVX512> { 7205 let Predicates = [prd] in { 7206 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7207 (Op _.FRC:$src2, 7208 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7209 _.FRC:$src3))))), 7210 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 7211 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7212 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7213 7214 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7215 (Op _.FRC:$src2, _.FRC:$src3, 7216 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7217 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 7218 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7219 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7220 7221 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7222 (Op _.FRC:$src2, 7223 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7224 (_.ScalarLdFrag addr:$src3)))))), 7225 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 7226 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7227 addr:$src3)>; 7228 7229 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7230 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7231 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 7232 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 7233 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7234 addr:$src3)>; 7235 7236 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7237 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7238 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7239 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 7240 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7241 addr:$src3)>; 7242 7243 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7244 (X86selects_mask VK1WM:$mask, 7245 (MaskedOp _.FRC:$src2, 7246 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7247 _.FRC:$src3), 7248 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7249 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 7250 VR128X:$src1, VK1WM:$mask, 7251 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7252 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7253 7254 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7255 (X86selects_mask VK1WM:$mask, 7256 (MaskedOp _.FRC:$src2, 7257 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7258 (_.ScalarLdFrag addr:$src3)), 7259 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7260 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 7261 VR128X:$src1, VK1WM:$mask, 7262 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7263 7264 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7265 (X86selects_mask VK1WM:$mask, 7266 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7267 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 7268 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7269 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 7270 VR128X:$src1, VK1WM:$mask, 7271 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7272 7273 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7274 (X86selects_mask VK1WM:$mask, 7275 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7276 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7277 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7278 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 7279 VR128X:$src1, VK1WM:$mask, 7280 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7281 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7282 7283 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7284 (X86selects_mask VK1WM:$mask, 7285 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7286 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7287 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7288 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 7289 VR128X:$src1, VK1WM:$mask, 7290 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7291 7292 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7293 (X86selects_mask VK1WM:$mask, 7294 (MaskedOp _.FRC:$src2, 7295 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7296 _.FRC:$src3), 7297 (_.EltVT ZeroFP)))))), 7298 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 7299 VR128X:$src1, VK1WM:$mask, 7300 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7301 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7302 7303 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7304 (X86selects_mask VK1WM:$mask, 7305 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7306 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7307 (_.EltVT ZeroFP)))))), 7308 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 7309 VR128X:$src1, VK1WM:$mask, 7310 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7311 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7312 7313 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7314 (X86selects_mask VK1WM:$mask, 7315 (MaskedOp _.FRC:$src2, 7316 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7317 (_.ScalarLdFrag addr:$src3)), 7318 (_.EltVT ZeroFP)))))), 7319 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 7320 VR128X:$src1, VK1WM:$mask, 7321 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7322 7323 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7324 (X86selects_mask VK1WM:$mask, 7325 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7326 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7327 (_.EltVT ZeroFP)))))), 7328 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 7329 VR128X:$src1, VK1WM:$mask, 7330 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7331 7332 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7333 (X86selects_mask VK1WM:$mask, 7334 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7335 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7336 (_.EltVT ZeroFP)))))), 7337 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 7338 VR128X:$src1, VK1WM:$mask, 7339 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7340 7341 // Patterns with rounding mode. 7342 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7343 (RndOp _.FRC:$src2, 7344 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7345 _.FRC:$src3, (i32 timm:$rc)))))), 7346 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7347 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7348 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7349 7350 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7351 (RndOp _.FRC:$src2, _.FRC:$src3, 7352 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7353 (i32 timm:$rc)))))), 7354 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7355 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7356 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7357 7358 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7359 (X86selects_mask VK1WM:$mask, 7360 (RndOp _.FRC:$src2, 7361 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7362 _.FRC:$src3, (i32 timm:$rc)), 7363 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7364 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 7365 VR128X:$src1, VK1WM:$mask, 7366 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7367 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7368 7369 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7370 (X86selects_mask VK1WM:$mask, 7371 (RndOp _.FRC:$src2, _.FRC:$src3, 7372 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7373 (i32 timm:$rc)), 7374 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7375 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7376 VR128X:$src1, VK1WM:$mask, 7377 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7378 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7379 7380 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7381 (X86selects_mask VK1WM:$mask, 7382 (RndOp _.FRC:$src2, 7383 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7384 _.FRC:$src3, (i32 timm:$rc)), 7385 (_.EltVT ZeroFP)))))), 7386 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7387 VR128X:$src1, VK1WM:$mask, 7388 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7389 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7390 7391 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7392 (X86selects_mask VK1WM:$mask, 7393 (RndOp _.FRC:$src2, _.FRC:$src3, 7394 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7395 (i32 timm:$rc)), 7396 (_.EltVT ZeroFP)))))), 7397 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7398 VR128X:$src1, VK1WM:$mask, 7399 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7400 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7401 } 7402} 7403defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7404 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7405defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7406 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7407defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7408 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7409defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7410 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7411 7412defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7413 "SS", X86Movss, v4f32x_info, fp32imm0>; 7414defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7415 "SS", X86Movss, v4f32x_info, fp32imm0>; 7416defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7417 "SS", X86Movss, v4f32x_info, fp32imm0>; 7418defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7419 "SS", X86Movss, v4f32x_info, fp32imm0>; 7420 7421defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7422 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7423defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7424 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7425defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7426 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7427defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7428 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7429 7430//===----------------------------------------------------------------------===// 7431// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7432//===----------------------------------------------------------------------===// 7433let Constraints = "$src1 = $dst" in { 7434multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7435 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7436 // NOTE: The SDNode have the multiply operands first with the add last. 7437 // This enables commuted load patterns to be autogenerated by tablegen. 7438 let ExeDomain = _.ExeDomain in { 7439 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7440 (ins _.RC:$src2, _.RC:$src3), 7441 OpcodeStr, "$src3, $src2", "$src2, $src3", 7442 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7443 T8PD, EVEX_4V, Sched<[sched]>; 7444 7445 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7446 (ins _.RC:$src2, _.MemOp:$src3), 7447 OpcodeStr, "$src3, $src2", "$src2, $src3", 7448 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7449 T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7450 7451 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7452 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7453 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7454 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7455 (OpNode _.RC:$src2, 7456 (_.VT (_.BroadcastLdFrag addr:$src3)), 7457 _.RC:$src1)>, 7458 T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 7459 } 7460} 7461} // Constraints = "$src1 = $dst" 7462 7463multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7464 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7465 let Predicates = [HasIFMA] in { 7466 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7467 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7468 } 7469 let Predicates = [HasVLX, HasIFMA] in { 7470 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7471 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7472 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7473 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7474 } 7475} 7476 7477defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7478 SchedWriteVecIMul, avx512vl_i64_info>, 7479 VEX_W; 7480defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7481 SchedWriteVecIMul, avx512vl_i64_info>, 7482 VEX_W; 7483 7484//===----------------------------------------------------------------------===// 7485// AVX-512 Scalar convert from sign integer to float/double 7486//===----------------------------------------------------------------------===// 7487 7488multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7489 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7490 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7491 string mem, list<Register> _Uses = [MXCSR], 7492 bit _mayRaiseFPException = 1> { 7493let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7494 mayRaiseFPException = _mayRaiseFPException in { 7495 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7496 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7497 (ins DstVT.FRC:$src1, SrcRC:$src), 7498 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7499 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7500 let mayLoad = 1 in 7501 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7502 (ins DstVT.FRC:$src1, x86memop:$src), 7503 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7504 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7505 } // hasSideEffects = 0 7506 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7507 (ins DstVT.RC:$src1, SrcRC:$src2), 7508 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7509 [(set DstVT.RC:$dst, 7510 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7511 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7512 7513 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7514 (ins DstVT.RC:$src1, x86memop:$src2), 7515 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7516 [(set DstVT.RC:$dst, 7517 (OpNode (DstVT.VT DstVT.RC:$src1), 7518 (ld_frag addr:$src2)))]>, 7519 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7520} 7521 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7522 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7523 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7524} 7525 7526multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7527 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7528 X86VectorVTInfo DstVT, string asm, 7529 string mem> { 7530 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7531 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7532 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7533 !strconcat(asm, 7534 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7535 [(set DstVT.RC:$dst, 7536 (OpNode (DstVT.VT DstVT.RC:$src1), 7537 SrcRC:$src2, 7538 (i32 timm:$rc)))]>, 7539 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7540 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7541 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7542 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7543} 7544 7545multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7546 X86FoldableSchedWrite sched, 7547 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7548 X86MemOperand x86memop, PatFrag ld_frag, 7549 string asm, string mem> { 7550 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7551 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7552 ld_frag, asm, mem>, VEX_LIG; 7553} 7554 7555let Predicates = [HasAVX512] in { 7556defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7557 WriteCvtI2SS, GR32, 7558 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7559 XS, EVEX_CD8<32, CD8VT1>; 7560defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7561 WriteCvtI2SS, GR64, 7562 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7563 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7564defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7565 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7566 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7567defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7568 WriteCvtI2SD, GR64, 7569 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7570 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7571 7572def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7573 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7574def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7575 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7576 7577def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7578 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7579def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7580 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7581def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7582 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7583def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7584 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7585 7586def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7587 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7588def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7589 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7590def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7591 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7592def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7593 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7594 7595defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7596 WriteCvtI2SS, GR32, 7597 v4f32x_info, i32mem, loadi32, 7598 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7599defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7600 WriteCvtI2SS, GR64, 7601 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7602 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7603defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7604 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7605 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7606defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7607 WriteCvtI2SD, GR64, 7608 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7609 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7610 7611def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7612 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7613def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7614 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7615 7616def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7617 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7618def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7619 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7620def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7621 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7622def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7623 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7624 7625def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7626 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7627def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7628 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7629def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7630 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7631def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7632 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7633} 7634 7635//===----------------------------------------------------------------------===// 7636// AVX-512 Scalar convert from float/double to integer 7637//===----------------------------------------------------------------------===// 7638 7639multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7640 X86VectorVTInfo DstVT, SDNode OpNode, 7641 SDNode OpNodeRnd, 7642 X86FoldableSchedWrite sched, string asm, 7643 string aliasStr, Predicate prd = HasAVX512> { 7644 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7645 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7646 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7647 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7648 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7649 let Uses = [MXCSR] in 7650 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7651 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7652 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7653 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7654 Sched<[sched]>; 7655 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7656 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7657 [(set DstVT.RC:$dst, (OpNode 7658 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7659 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7660 } // Predicates = [prd] 7661 7662 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7663 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7664 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7665 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7666 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7667 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7668 SrcVT.IntScalarMemOp:$src), 0, "att">; 7669} 7670 7671// Convert float/double to signed/unsigned int 32/64 7672defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7673 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7674 XS, EVEX_CD8<32, CD8VT1>; 7675defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7676 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7677 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7678defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7679 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7680 XS, EVEX_CD8<32, CD8VT1>; 7681defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7682 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7683 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7684defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7685 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7686 XD, EVEX_CD8<64, CD8VT1>; 7687defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7688 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7689 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7690defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7691 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7692 XD, EVEX_CD8<64, CD8VT1>; 7693defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7694 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7695 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7696 7697multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7698 X86VectorVTInfo DstVT, SDNode OpNode, 7699 X86FoldableSchedWrite sched> { 7700 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7701 let isCodeGenOnly = 1 in { 7702 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7703 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7704 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7705 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7706 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7707 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7708 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7709 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7710 } 7711 } // Predicates = [HasAVX512] 7712} 7713 7714defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7715 lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>; 7716defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7717 llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7718defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7719 lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>; 7720defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7721 llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7722 7723let Predicates = [HasAVX512] in { 7724 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7725 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7726 7727 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7728 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7729} 7730 7731// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7732// which produce unnecessary vmovs{s,d} instructions 7733let Predicates = [HasAVX512] in { 7734def : Pat<(v4f32 (X86Movss 7735 (v4f32 VR128X:$dst), 7736 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7737 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7738 7739def : Pat<(v4f32 (X86Movss 7740 (v4f32 VR128X:$dst), 7741 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7742 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7743 7744def : Pat<(v4f32 (X86Movss 7745 (v4f32 VR128X:$dst), 7746 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7747 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7748 7749def : Pat<(v4f32 (X86Movss 7750 (v4f32 VR128X:$dst), 7751 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7752 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7753 7754def : Pat<(v2f64 (X86Movsd 7755 (v2f64 VR128X:$dst), 7756 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7757 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7758 7759def : Pat<(v2f64 (X86Movsd 7760 (v2f64 VR128X:$dst), 7761 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7762 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7763 7764def : Pat<(v2f64 (X86Movsd 7765 (v2f64 VR128X:$dst), 7766 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7767 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7768 7769def : Pat<(v2f64 (X86Movsd 7770 (v2f64 VR128X:$dst), 7771 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7772 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7773 7774def : Pat<(v4f32 (X86Movss 7775 (v4f32 VR128X:$dst), 7776 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7777 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7778 7779def : Pat<(v4f32 (X86Movss 7780 (v4f32 VR128X:$dst), 7781 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7782 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7783 7784def : Pat<(v4f32 (X86Movss 7785 (v4f32 VR128X:$dst), 7786 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7787 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7788 7789def : Pat<(v4f32 (X86Movss 7790 (v4f32 VR128X:$dst), 7791 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7792 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7793 7794def : Pat<(v2f64 (X86Movsd 7795 (v2f64 VR128X:$dst), 7796 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7797 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7798 7799def : Pat<(v2f64 (X86Movsd 7800 (v2f64 VR128X:$dst), 7801 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7802 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7803 7804def : Pat<(v2f64 (X86Movsd 7805 (v2f64 VR128X:$dst), 7806 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7807 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7808 7809def : Pat<(v2f64 (X86Movsd 7810 (v2f64 VR128X:$dst), 7811 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7812 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7813} // Predicates = [HasAVX512] 7814 7815// Convert float/double to signed/unsigned int 32/64 with truncation 7816multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7817 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7818 SDNode OpNodeInt, SDNode OpNodeSAE, 7819 X86FoldableSchedWrite sched, string aliasStr, 7820 Predicate prd = HasAVX512> { 7821let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7822 let isCodeGenOnly = 1 in { 7823 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7824 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7825 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7826 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7827 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7828 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7829 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7830 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7831 } 7832 7833 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7834 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7835 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7836 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7837 let Uses = [MXCSR] in 7838 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7839 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7840 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7841 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7842 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7843 (ins _SrcRC.IntScalarMemOp:$src), 7844 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7845 [(set _DstRC.RC:$dst, 7846 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7847 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7848} // Predicates = [prd] 7849 7850 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7851 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7852 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7853 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7854 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7855 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7856 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7857} 7858 7859defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7860 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7861 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7862defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7863 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7864 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7865defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7866 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7867 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7868defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7869 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7870 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7871 7872defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7873 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7874 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7875defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7876 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7877 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7878defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7879 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7880 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7881defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7882 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7883 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7884 7885//===----------------------------------------------------------------------===// 7886// AVX-512 Convert form float to double and back 7887//===----------------------------------------------------------------------===// 7888 7889let Uses = [MXCSR], mayRaiseFPException = 1 in 7890multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7891 X86VectorVTInfo _Src, SDNode OpNode, 7892 X86FoldableSchedWrite sched> { 7893 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7894 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7895 "$src2, $src1", "$src1, $src2", 7896 (_.VT (OpNode (_.VT _.RC:$src1), 7897 (_Src.VT _Src.RC:$src2)))>, 7898 EVEX_4V, VEX_LIG, Sched<[sched]>; 7899 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7900 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7901 "$src2, $src1", "$src1, $src2", 7902 (_.VT (OpNode (_.VT _.RC:$src1), 7903 (_Src.ScalarIntMemFrags addr:$src2)))>, 7904 EVEX_4V, VEX_LIG, 7905 Sched<[sched.Folded, sched.ReadAfterFold]>; 7906 7907 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7908 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7909 (ins _.FRC:$src1, _Src.FRC:$src2), 7910 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7911 EVEX_4V, VEX_LIG, Sched<[sched]>; 7912 let mayLoad = 1 in 7913 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7914 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7915 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7916 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7917 } 7918} 7919 7920// Scalar Conversion with SAE - suppress all exceptions 7921multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7922 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7923 X86FoldableSchedWrite sched> { 7924 let Uses = [MXCSR] in 7925 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7926 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7927 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7928 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7929 (_Src.VT _Src.RC:$src2)))>, 7930 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7931} 7932 7933// Scalar Conversion with rounding control (RC) 7934multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7935 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7936 X86FoldableSchedWrite sched> { 7937 let Uses = [MXCSR] in 7938 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7939 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7940 "$rc, $src2, $src1", "$src1, $src2, $rc", 7941 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7942 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7943 EVEX_4V, VEX_LIG, Sched<[sched]>, 7944 EVEX_B, EVEX_RC; 7945} 7946multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7947 SDNode OpNode, SDNode OpNodeRnd, 7948 X86FoldableSchedWrite sched, 7949 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7950 Predicate prd = HasAVX512> { 7951 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7952 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7953 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7954 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7955 } 7956} 7957 7958multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7959 SDNode OpNode, SDNode OpNodeSAE, 7960 X86FoldableSchedWrite sched, 7961 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7962 Predicate prd = HasAVX512> { 7963 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7964 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7965 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7966 EVEX_CD8<_src.EltSize, CD8VT1>; 7967 } 7968} 7969defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7970 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7971 f32x_info>, XD, VEX_W; 7972defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7973 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7974 f64x_info>, XS; 7975defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7976 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7977 f16x_info, HasFP16>, T_MAP5XD, VEX_W; 7978defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7979 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7980 f64x_info, HasFP16>, T_MAP5XS; 7981defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7982 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7983 f16x_info, HasFP16>, T_MAP5PS; 7984defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7985 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7986 f32x_info, HasFP16>, T_MAP6PS; 7987 7988def : Pat<(f64 (any_fpextend FR32X:$src)), 7989 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7990 Requires<[HasAVX512]>; 7991def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7992 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7993 Requires<[HasAVX512, OptForSize]>; 7994 7995def : Pat<(f32 (any_fpround FR64X:$src)), 7996 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7997 Requires<[HasAVX512]>; 7998 7999def : Pat<(f32 (any_fpextend FR16X:$src)), 8000 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 8001 Requires<[HasFP16]>; 8002def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 8003 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 8004 Requires<[HasFP16, OptForSize]>; 8005 8006def : Pat<(f64 (any_fpextend FR16X:$src)), 8007 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 8008 Requires<[HasFP16]>; 8009def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 8010 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 8011 Requires<[HasFP16, OptForSize]>; 8012 8013def : Pat<(f16 (any_fpround FR32X:$src)), 8014 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 8015 Requires<[HasFP16]>; 8016def : Pat<(f16 (any_fpround FR64X:$src)), 8017 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 8018 Requires<[HasFP16]>; 8019 8020def : Pat<(v4f32 (X86Movss 8021 (v4f32 VR128X:$dst), 8022 (v4f32 (scalar_to_vector 8023 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 8024 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 8025 Requires<[HasAVX512]>; 8026 8027def : Pat<(v2f64 (X86Movsd 8028 (v2f64 VR128X:$dst), 8029 (v2f64 (scalar_to_vector 8030 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 8031 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 8032 Requires<[HasAVX512]>; 8033 8034//===----------------------------------------------------------------------===// 8035// AVX-512 Vector convert from signed/unsigned integer to float/double 8036// and from float/double to signed/unsigned integer 8037//===----------------------------------------------------------------------===// 8038 8039multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8040 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 8041 X86FoldableSchedWrite sched, 8042 string Broadcast = _.BroadcastStr, 8043 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 8044 RegisterClass MaskRC = _.KRCWM, 8045 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 8046 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 8047let Uses = [MXCSR], mayRaiseFPException = 1 in { 8048 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 8049 (ins _Src.RC:$src), 8050 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 8051 (ins MaskRC:$mask, _Src.RC:$src), 8052 OpcodeStr, "$src", "$src", 8053 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 8054 (vselect_mask MaskRC:$mask, 8055 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 8056 _.RC:$src0), 8057 (vselect_mask MaskRC:$mask, 8058 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 8059 _.ImmAllZerosV)>, 8060 EVEX, Sched<[sched]>; 8061 8062 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8063 (ins MemOp:$src), 8064 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 8065 (ins MaskRC:$mask, MemOp:$src), 8066 OpcodeStr#Alias, "$src", "$src", 8067 LdDAG, 8068 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 8069 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 8070 EVEX, Sched<[sched.Folded]>; 8071 8072 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8073 (ins _Src.ScalarMemOp:$src), 8074 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 8075 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 8076 OpcodeStr, 8077 "${src}"#Broadcast, "${src}"#Broadcast, 8078 (_.VT (OpNode (_Src.VT 8079 (_Src.BroadcastLdFrag addr:$src)) 8080 )), 8081 (vselect_mask MaskRC:$mask, 8082 (_.VT 8083 (MaskOpNode 8084 (_Src.VT 8085 (_Src.BroadcastLdFrag addr:$src)))), 8086 _.RC:$src0), 8087 (vselect_mask MaskRC:$mask, 8088 (_.VT 8089 (MaskOpNode 8090 (_Src.VT 8091 (_Src.BroadcastLdFrag addr:$src)))), 8092 _.ImmAllZerosV)>, 8093 EVEX, EVEX_B, Sched<[sched.Folded]>; 8094 } 8095} 8096// Conversion with SAE - suppress all exceptions 8097multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8098 X86VectorVTInfo _Src, SDNode OpNodeSAE, 8099 X86FoldableSchedWrite sched> { 8100 let Uses = [MXCSR] in 8101 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8102 (ins _Src.RC:$src), OpcodeStr, 8103 "{sae}, $src", "$src, {sae}", 8104 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 8105 EVEX, EVEX_B, Sched<[sched]>; 8106} 8107 8108// Conversion with rounding control (RC) 8109multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8110 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 8111 X86FoldableSchedWrite sched> { 8112 let Uses = [MXCSR] in 8113 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8114 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 8115 "$rc, $src", "$src, $rc", 8116 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 8117 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8118} 8119 8120// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 8121multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8122 X86VectorVTInfo _Src, SDPatternOperator OpNode, 8123 SDNode MaskOpNode, 8124 X86FoldableSchedWrite sched, 8125 string Broadcast = _.BroadcastStr, 8126 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 8127 RegisterClass MaskRC = _.KRCWM> 8128 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 8129 Alias, MemOp, MaskRC, 8130 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 8131 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 8132 8133// Extend [Float to Double, Half to Float] 8134multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 8135 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8136 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 8137 let Predicates = [prd] in { 8138 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 8139 any_fpextend, fpextend, sched.ZMM>, 8140 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 8141 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8142 } 8143 let Predicates = [prd, HasVLX] in { 8144 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 8145 X86any_vfpext, X86vfpext, sched.XMM, 8146 _dst.info128.BroadcastStr, 8147 "", f64mem>, EVEX_V128; 8148 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 8149 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 8150 } 8151} 8152 8153// Truncate [Double to Float, Float to Half] 8154multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 8155 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8156 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 8157 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 8158 PatFrag loadVT128 = _src.info128.LdFrag, 8159 RegisterClass maskRC128 = _src.info128.KRCWM> { 8160 let Predicates = [prd] in { 8161 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 8162 X86any_vfpround, X86vfpround, sched.ZMM>, 8163 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8164 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8165 } 8166 let Predicates = [prd, HasVLX] in { 8167 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 8168 null_frag, null_frag, sched.XMM, 8169 _src.info128.BroadcastStr, "{x}", 8170 f128mem, maskRC128>, EVEX_V128; 8171 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 8172 X86any_vfpround, X86vfpround, 8173 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 8174 8175 // Special patterns to allow use of X86vmfpround for masking. Instruction 8176 // patterns have been disabled with null_frag. 8177 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 8178 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8179 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8180 maskRC128:$mask), 8181 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 8182 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8183 maskRC128:$mask), 8184 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 8185 8186 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 8187 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8188 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 8189 maskRC128:$mask), 8190 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8191 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 8192 maskRC128:$mask), 8193 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 8194 8195 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 8196 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8197 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8198 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 8199 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8200 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8201 _dst.info128.ImmAllZerosV, maskRC128:$mask), 8202 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 8203 } 8204 8205 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8206 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8207 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8208 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8209 VK2WM:$mask, VR128X:$src), 0, "att">; 8210 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 8211 "$dst {${mask}} {z}, $src}", 8212 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8213 VK2WM:$mask, VR128X:$src), 0, "att">; 8214 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8215 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8216 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8217 "$dst {${mask}}, ${src}{1to2}}", 8218 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8219 VK2WM:$mask, f64mem:$src), 0, "att">; 8220 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8221 "$dst {${mask}} {z}, ${src}{1to2}}", 8222 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8223 VK2WM:$mask, f64mem:$src), 0, "att">; 8224 8225 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8226 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8227 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8228 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8229 VK4WM:$mask, VR256X:$src), 0, "att">; 8230 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8231 "$dst {${mask}} {z}, $src}", 8232 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8233 VK4WM:$mask, VR256X:$src), 0, "att">; 8234 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8235 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8236 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8237 "$dst {${mask}}, ${src}{1to4}}", 8238 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8239 VK4WM:$mask, f64mem:$src), 0, "att">; 8240 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8241 "$dst {${mask}} {z}, ${src}{1to4}}", 8242 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8243 VK4WM:$mask, f64mem:$src), 0, "att">; 8244} 8245 8246defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 8247 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 8248 VEX_W, PD, EVEX_CD8<64, CD8VF>; 8249defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 8250 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 8251 PS, EVEX_CD8<32, CD8VH>; 8252 8253// Extend Half to Double 8254multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 8255 X86SchedWriteWidths sched> { 8256 let Predicates = [HasFP16] in { 8257 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 8258 any_fpextend, fpextend, sched.ZMM>, 8259 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 8260 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8261 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 8262 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 8263 } 8264 let Predicates = [HasFP16, HasVLX] in { 8265 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 8266 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 8267 f32mem>, EVEX_V128; 8268 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 8269 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 8270 f64mem>, EVEX_V256; 8271 } 8272} 8273 8274// Truncate Double to Half 8275multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 8276 let Predicates = [HasFP16] in { 8277 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8278 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8279 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8280 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8281 } 8282 let Predicates = [HasFP16, HasVLX] in { 8283 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8284 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8285 VK2WM>, EVEX_V128; 8286 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8287 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8288 VK4WM>, EVEX_V256; 8289 } 8290 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8291 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8292 VR128X:$src), 0, "att">; 8293 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8294 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8295 VK2WM:$mask, VR128X:$src), 0, "att">; 8296 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8297 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8298 VK2WM:$mask, VR128X:$src), 0, "att">; 8299 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8300 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8301 i64mem:$src), 0, "att">; 8302 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8303 "$dst {${mask}}, ${src}{1to2}}", 8304 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8305 VK2WM:$mask, i64mem:$src), 0, "att">; 8306 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8307 "$dst {${mask}} {z}, ${src}{1to2}}", 8308 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8309 VK2WM:$mask, i64mem:$src), 0, "att">; 8310 8311 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8312 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8313 VR256X:$src), 0, "att">; 8314 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8315 "$dst {${mask}}, $src}", 8316 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8317 VK4WM:$mask, VR256X:$src), 0, "att">; 8318 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8319 "$dst {${mask}} {z}, $src}", 8320 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8321 VK4WM:$mask, VR256X:$src), 0, "att">; 8322 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8323 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8324 i64mem:$src), 0, "att">; 8325 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8326 "$dst {${mask}}, ${src}{1to4}}", 8327 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8328 VK4WM:$mask, i64mem:$src), 0, "att">; 8329 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8330 "$dst {${mask}} {z}, ${src}{1to4}}", 8331 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8332 VK4WM:$mask, i64mem:$src), 0, "att">; 8333 8334 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8335 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8336 VR512:$src), 0, "att">; 8337 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8338 "$dst {${mask}}, $src}", 8339 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8340 VK8WM:$mask, VR512:$src), 0, "att">; 8341 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8342 "$dst {${mask}} {z}, $src}", 8343 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8344 VK8WM:$mask, VR512:$src), 0, "att">; 8345 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8346 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8347 i64mem:$src), 0, "att">; 8348 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8349 "$dst {${mask}}, ${src}{1to8}}", 8350 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8351 VK8WM:$mask, i64mem:$src), 0, "att">; 8352 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8353 "$dst {${mask}} {z}, ${src}{1to8}}", 8354 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8355 VK8WM:$mask, i64mem:$src), 0, "att">; 8356} 8357 8358defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8359 avx512vl_f32_info, SchedWriteCvtPD2PS, 8360 HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>; 8361defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8362 avx512vl_f16_info, SchedWriteCvtPS2PD, 8363 HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>; 8364defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8365 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>; 8366defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8367 T_MAP5PS, EVEX_CD8<16, CD8VQ>; 8368 8369let Predicates = [HasFP16, HasVLX] in { 8370 // Special patterns to allow use of X86vmfpround for masking. Instruction 8371 // patterns have been disabled with null_frag. 8372 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8373 (VCVTPD2PHZ256rr VR256X:$src)>; 8374 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8375 VK4WM:$mask)), 8376 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8377 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8378 VK4WM:$mask), 8379 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8380 8381 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8382 (VCVTPD2PHZ256rm addr:$src)>; 8383 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8384 VK4WM:$mask), 8385 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8386 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8387 VK4WM:$mask), 8388 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8389 8390 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8391 (VCVTPD2PHZ256rmb addr:$src)>; 8392 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8393 (v8f16 VR128X:$src0), VK4WM:$mask), 8394 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8395 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8396 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8397 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8398 8399 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8400 (VCVTPD2PHZ128rr VR128X:$src)>; 8401 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8402 VK2WM:$mask), 8403 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8404 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8405 VK2WM:$mask), 8406 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8407 8408 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8409 (VCVTPD2PHZ128rm addr:$src)>; 8410 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8411 VK2WM:$mask), 8412 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8413 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8414 VK2WM:$mask), 8415 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8416 8417 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8418 (VCVTPD2PHZ128rmb addr:$src)>; 8419 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8420 (v8f16 VR128X:$src0), VK2WM:$mask), 8421 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8422 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8423 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8424 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8425} 8426 8427// Convert Signed/Unsigned Doubleword to Double 8428let Uses = []<Register>, mayRaiseFPException = 0 in 8429multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8430 SDNode MaskOpNode, SDPatternOperator OpNode128, 8431 SDNode MaskOpNode128, 8432 X86SchedWriteWidths sched> { 8433 // No rounding in this op 8434 let Predicates = [HasAVX512] in 8435 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8436 MaskOpNode, sched.ZMM>, EVEX_V512; 8437 8438 let Predicates = [HasVLX] in { 8439 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8440 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8441 "", i64mem, VK2WM, 8442 (v2f64 (OpNode128 (bc_v4i32 8443 (v2i64 8444 (scalar_to_vector (loadi64 addr:$src)))))), 8445 (v2f64 (MaskOpNode128 (bc_v4i32 8446 (v2i64 8447 (scalar_to_vector (loadi64 addr:$src))))))>, 8448 EVEX_V128; 8449 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8450 MaskOpNode, sched.YMM>, EVEX_V256; 8451 } 8452} 8453 8454// Convert Signed/Unsigned Doubleword to Float 8455multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8456 SDNode MaskOpNode, SDNode OpNodeRnd, 8457 X86SchedWriteWidths sched> { 8458 let Predicates = [HasAVX512] in 8459 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8460 MaskOpNode, sched.ZMM>, 8461 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8462 OpNodeRnd, sched.ZMM>, EVEX_V512; 8463 8464 let Predicates = [HasVLX] in { 8465 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8466 MaskOpNode, sched.XMM>, EVEX_V128; 8467 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8468 MaskOpNode, sched.YMM>, EVEX_V256; 8469 } 8470} 8471 8472// Convert Float to Signed/Unsigned Doubleword with truncation 8473multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8474 SDNode MaskOpNode, 8475 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8476 let Predicates = [HasAVX512] in { 8477 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8478 MaskOpNode, sched.ZMM>, 8479 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8480 OpNodeSAE, sched.ZMM>, EVEX_V512; 8481 } 8482 let Predicates = [HasVLX] in { 8483 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8484 MaskOpNode, sched.XMM>, EVEX_V128; 8485 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8486 MaskOpNode, sched.YMM>, EVEX_V256; 8487 } 8488} 8489 8490// Convert Float to Signed/Unsigned Doubleword 8491multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8492 SDNode MaskOpNode, SDNode OpNodeRnd, 8493 X86SchedWriteWidths sched> { 8494 let Predicates = [HasAVX512] in { 8495 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8496 MaskOpNode, sched.ZMM>, 8497 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8498 OpNodeRnd, sched.ZMM>, EVEX_V512; 8499 } 8500 let Predicates = [HasVLX] in { 8501 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8502 MaskOpNode, sched.XMM>, EVEX_V128; 8503 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8504 MaskOpNode, sched.YMM>, EVEX_V256; 8505 } 8506} 8507 8508// Convert Double to Signed/Unsigned Doubleword with truncation 8509multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8510 SDNode MaskOpNode, SDNode OpNodeSAE, 8511 X86SchedWriteWidths sched> { 8512 let Predicates = [HasAVX512] in { 8513 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8514 MaskOpNode, sched.ZMM>, 8515 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8516 OpNodeSAE, sched.ZMM>, EVEX_V512; 8517 } 8518 let Predicates = [HasVLX] in { 8519 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8520 // memory forms of these instructions in Asm Parser. They have the same 8521 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8522 // due to the same reason. 8523 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8524 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8525 VK2WM>, EVEX_V128; 8526 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8527 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8528 } 8529 8530 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8531 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8532 VR128X:$src), 0, "att">; 8533 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8534 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8535 VK2WM:$mask, VR128X:$src), 0, "att">; 8536 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8537 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8538 VK2WM:$mask, VR128X:$src), 0, "att">; 8539 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8540 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8541 f64mem:$src), 0, "att">; 8542 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8543 "$dst {${mask}}, ${src}{1to2}}", 8544 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8545 VK2WM:$mask, f64mem:$src), 0, "att">; 8546 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8547 "$dst {${mask}} {z}, ${src}{1to2}}", 8548 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8549 VK2WM:$mask, f64mem:$src), 0, "att">; 8550 8551 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8552 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8553 VR256X:$src), 0, "att">; 8554 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8555 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8556 VK4WM:$mask, VR256X:$src), 0, "att">; 8557 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8558 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8559 VK4WM:$mask, VR256X:$src), 0, "att">; 8560 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8561 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8562 f64mem:$src), 0, "att">; 8563 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8564 "$dst {${mask}}, ${src}{1to4}}", 8565 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8566 VK4WM:$mask, f64mem:$src), 0, "att">; 8567 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8568 "$dst {${mask}} {z}, ${src}{1to4}}", 8569 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8570 VK4WM:$mask, f64mem:$src), 0, "att">; 8571} 8572 8573// Convert Double to Signed/Unsigned Doubleword 8574multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8575 SDNode MaskOpNode, SDNode OpNodeRnd, 8576 X86SchedWriteWidths sched> { 8577 let Predicates = [HasAVX512] in { 8578 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8579 MaskOpNode, sched.ZMM>, 8580 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8581 OpNodeRnd, sched.ZMM>, EVEX_V512; 8582 } 8583 let Predicates = [HasVLX] in { 8584 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8585 // memory forms of these instructions in Asm Parcer. They have the same 8586 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8587 // due to the same reason. 8588 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8589 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8590 VK2WM>, EVEX_V128; 8591 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8592 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8593 } 8594 8595 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8596 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8597 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8598 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8599 VK2WM:$mask, VR128X:$src), 0, "att">; 8600 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8601 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8602 VK2WM:$mask, VR128X:$src), 0, "att">; 8603 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8604 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8605 f64mem:$src), 0, "att">; 8606 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8607 "$dst {${mask}}, ${src}{1to2}}", 8608 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8609 VK2WM:$mask, f64mem:$src), 0, "att">; 8610 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8611 "$dst {${mask}} {z}, ${src}{1to2}}", 8612 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8613 VK2WM:$mask, f64mem:$src), 0, "att">; 8614 8615 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8616 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8617 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8618 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8619 VK4WM:$mask, VR256X:$src), 0, "att">; 8620 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8621 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8622 VK4WM:$mask, VR256X:$src), 0, "att">; 8623 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8624 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8625 f64mem:$src), 0, "att">; 8626 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8627 "$dst {${mask}}, ${src}{1to4}}", 8628 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8629 VK4WM:$mask, f64mem:$src), 0, "att">; 8630 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8631 "$dst {${mask}} {z}, ${src}{1to4}}", 8632 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8633 VK4WM:$mask, f64mem:$src), 0, "att">; 8634} 8635 8636// Convert Double to Signed/Unsigned Quardword 8637multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8638 SDNode MaskOpNode, SDNode OpNodeRnd, 8639 X86SchedWriteWidths sched> { 8640 let Predicates = [HasDQI] in { 8641 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8642 MaskOpNode, sched.ZMM>, 8643 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8644 OpNodeRnd, sched.ZMM>, EVEX_V512; 8645 } 8646 let Predicates = [HasDQI, HasVLX] in { 8647 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8648 MaskOpNode, sched.XMM>, EVEX_V128; 8649 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8650 MaskOpNode, sched.YMM>, EVEX_V256; 8651 } 8652} 8653 8654// Convert Double to Signed/Unsigned Quardword with truncation 8655multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8656 SDNode MaskOpNode, SDNode OpNodeRnd, 8657 X86SchedWriteWidths sched> { 8658 let Predicates = [HasDQI] in { 8659 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8660 MaskOpNode, sched.ZMM>, 8661 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8662 OpNodeRnd, sched.ZMM>, EVEX_V512; 8663 } 8664 let Predicates = [HasDQI, HasVLX] in { 8665 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8666 MaskOpNode, sched.XMM>, EVEX_V128; 8667 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8668 MaskOpNode, sched.YMM>, EVEX_V256; 8669 } 8670} 8671 8672// Convert Signed/Unsigned Quardword to Double 8673multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8674 SDNode MaskOpNode, SDNode OpNodeRnd, 8675 X86SchedWriteWidths sched> { 8676 let Predicates = [HasDQI] in { 8677 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8678 MaskOpNode, sched.ZMM>, 8679 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8680 OpNodeRnd, sched.ZMM>, EVEX_V512; 8681 } 8682 let Predicates = [HasDQI, HasVLX] in { 8683 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8684 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 8685 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8686 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 8687 } 8688} 8689 8690// Convert Float to Signed/Unsigned Quardword 8691multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8692 SDNode MaskOpNode, SDNode OpNodeRnd, 8693 X86SchedWriteWidths sched> { 8694 let Predicates = [HasDQI] in { 8695 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8696 MaskOpNode, sched.ZMM>, 8697 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8698 OpNodeRnd, sched.ZMM>, EVEX_V512; 8699 } 8700 let Predicates = [HasDQI, HasVLX] in { 8701 // Explicitly specified broadcast string, since we take only 2 elements 8702 // from v4f32x_info source 8703 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8704 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8705 (v2i64 (OpNode (bc_v4f32 8706 (v2f64 8707 (scalar_to_vector (loadf64 addr:$src)))))), 8708 (v2i64 (MaskOpNode (bc_v4f32 8709 (v2f64 8710 (scalar_to_vector (loadf64 addr:$src))))))>, 8711 EVEX_V128; 8712 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8713 MaskOpNode, sched.YMM>, EVEX_V256; 8714 } 8715} 8716 8717// Convert Float to Signed/Unsigned Quardword with truncation 8718multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8719 SDNode MaskOpNode, SDNode OpNodeRnd, 8720 X86SchedWriteWidths sched> { 8721 let Predicates = [HasDQI] in { 8722 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8723 MaskOpNode, sched.ZMM>, 8724 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8725 OpNodeRnd, sched.ZMM>, EVEX_V512; 8726 } 8727 let Predicates = [HasDQI, HasVLX] in { 8728 // Explicitly specified broadcast string, since we take only 2 elements 8729 // from v4f32x_info source 8730 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8731 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8732 (v2i64 (OpNode (bc_v4f32 8733 (v2f64 8734 (scalar_to_vector (loadf64 addr:$src)))))), 8735 (v2i64 (MaskOpNode (bc_v4f32 8736 (v2f64 8737 (scalar_to_vector (loadf64 addr:$src))))))>, 8738 EVEX_V128; 8739 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8740 MaskOpNode, sched.YMM>, EVEX_V256; 8741 } 8742} 8743 8744// Convert Signed/Unsigned Quardword to Float 8745// Also Convert Signed/Unsigned Doubleword to Half 8746multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8747 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8748 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8749 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8750 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8751 let Predicates = [prd] in { 8752 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8753 MaskOpNode, sched.ZMM>, 8754 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8755 OpNodeRnd, sched.ZMM>, EVEX_V512; 8756 } 8757 let Predicates = [prd, HasVLX] in { 8758 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8759 // memory forms of these instructions in Asm Parcer. They have the same 8760 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8761 // due to the same reason. 8762 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8763 null_frag, sched.XMM, _src.info128.BroadcastStr, 8764 "{x}", i128mem, _src.info128.KRCWM>, 8765 EVEX_V128, NotEVEX2VEXConvertible; 8766 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8767 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8768 "{y}">, EVEX_V256, 8769 NotEVEX2VEXConvertible; 8770 8771 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8772 // patterns have been disabled with null_frag. 8773 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8774 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8775 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8776 _src.info128.KRCWM:$mask), 8777 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8778 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8779 _src.info128.KRCWM:$mask), 8780 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8781 8782 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8783 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8784 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8785 _src.info128.KRCWM:$mask), 8786 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8787 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8788 _src.info128.KRCWM:$mask), 8789 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8790 8791 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8792 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8793 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8794 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8795 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8796 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8797 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8798 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8799 } 8800 8801 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8802 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8803 VR128X:$src), 0, "att">; 8804 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8805 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8806 VK2WM:$mask, VR128X:$src), 0, "att">; 8807 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8808 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8809 VK2WM:$mask, VR128X:$src), 0, "att">; 8810 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8811 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8812 i64mem:$src), 0, "att">; 8813 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8814 "$dst {${mask}}, ${src}{1to2}}", 8815 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8816 VK2WM:$mask, i64mem:$src), 0, "att">; 8817 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8818 "$dst {${mask}} {z}, ${src}{1to2}}", 8819 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8820 VK2WM:$mask, i64mem:$src), 0, "att">; 8821 8822 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8823 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8824 VR256X:$src), 0, "att">; 8825 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8826 "$dst {${mask}}, $src}", 8827 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8828 VK4WM:$mask, VR256X:$src), 0, "att">; 8829 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8830 "$dst {${mask}} {z}, $src}", 8831 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8832 VK4WM:$mask, VR256X:$src), 0, "att">; 8833 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8834 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8835 i64mem:$src), 0, "att">; 8836 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8837 "$dst {${mask}}, ${src}{1to4}}", 8838 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8839 VK4WM:$mask, i64mem:$src), 0, "att">; 8840 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8841 "$dst {${mask}} {z}, ${src}{1to4}}", 8842 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8843 VK4WM:$mask, i64mem:$src), 0, "att">; 8844} 8845 8846defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8847 X86any_VSintToFP, X86VSintToFP, 8848 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8849 8850defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8851 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8852 PS, EVEX_CD8<32, CD8VF>; 8853 8854defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8855 X86cvttp2si, X86cvttp2siSAE, 8856 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; 8857 8858defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8859 X86cvttp2si, X86cvttp2siSAE, 8860 SchedWriteCvtPD2DQ>, 8861 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8862 8863defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8864 X86cvttp2ui, X86cvttp2uiSAE, 8865 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; 8866 8867defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8868 X86cvttp2ui, X86cvttp2uiSAE, 8869 SchedWriteCvtPD2DQ>, 8870 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8871 8872defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8873 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8874 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8875 8876defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8877 uint_to_fp, X86VUintToFpRnd, 8878 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; 8879 8880defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8881 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8882 EVEX_CD8<32, CD8VF>; 8883 8884defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8885 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8886 VEX_W, EVEX_CD8<64, CD8VF>; 8887 8888defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8889 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8890 PS, EVEX_CD8<32, CD8VF>; 8891 8892defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8893 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8894 PS, EVEX_CD8<64, CD8VF>; 8895 8896defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8897 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8898 PD, EVEX_CD8<64, CD8VF>; 8899 8900defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8901 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8902 EVEX_CD8<32, CD8VH>; 8903 8904defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8905 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8906 PD, EVEX_CD8<64, CD8VF>; 8907 8908defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8909 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8910 EVEX_CD8<32, CD8VH>; 8911 8912defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8913 X86cvttp2si, X86cvttp2siSAE, 8914 SchedWriteCvtPD2DQ>, VEX_W, 8915 PD, EVEX_CD8<64, CD8VF>; 8916 8917defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8918 X86cvttp2si, X86cvttp2siSAE, 8919 SchedWriteCvtPS2DQ>, PD, 8920 EVEX_CD8<32, CD8VH>; 8921 8922defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8923 X86cvttp2ui, X86cvttp2uiSAE, 8924 SchedWriteCvtPD2DQ>, VEX_W, 8925 PD, EVEX_CD8<64, CD8VF>; 8926 8927defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8928 X86cvttp2ui, X86cvttp2uiSAE, 8929 SchedWriteCvtPS2DQ>, PD, 8930 EVEX_CD8<32, CD8VH>; 8931 8932defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8933 sint_to_fp, X86VSintToFpRnd, 8934 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; 8935 8936defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8937 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8938 VEX_W, XS, EVEX_CD8<64, CD8VF>; 8939 8940defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8941 X86any_VSintToFP, X86VMSintToFP, 8942 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8943 SchedWriteCvtDQ2PS, HasFP16>, 8944 T_MAP5PS, EVEX_CD8<32, CD8VF>; 8945 8946defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8947 X86any_VUintToFP, X86VMUintToFP, 8948 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8949 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD, 8950 EVEX_CD8<32, CD8VF>; 8951 8952defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8953 X86any_VSintToFP, X86VMSintToFP, 8954 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8955 SchedWriteCvtDQ2PS>, VEX_W, PS, 8956 EVEX_CD8<64, CD8VF>; 8957 8958defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8959 X86any_VUintToFP, X86VMUintToFP, 8960 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8961 SchedWriteCvtDQ2PS>, VEX_W, XD, 8962 EVEX_CD8<64, CD8VF>; 8963 8964let Predicates = [HasVLX] in { 8965 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8966 // patterns have been disabled with null_frag. 8967 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8968 (VCVTPD2DQZ128rr VR128X:$src)>; 8969 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8970 VK2WM:$mask), 8971 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8972 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8973 VK2WM:$mask), 8974 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8975 8976 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8977 (VCVTPD2DQZ128rm addr:$src)>; 8978 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8979 VK2WM:$mask), 8980 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8981 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8982 VK2WM:$mask), 8983 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8984 8985 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8986 (VCVTPD2DQZ128rmb addr:$src)>; 8987 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8988 (v4i32 VR128X:$src0), VK2WM:$mask), 8989 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8990 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8991 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8992 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8993 8994 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8995 // patterns have been disabled with null_frag. 8996 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8997 (VCVTTPD2DQZ128rr VR128X:$src)>; 8998 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8999 VK2WM:$mask), 9000 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 9001 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 9002 VK2WM:$mask), 9003 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9004 9005 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 9006 (VCVTTPD2DQZ128rm addr:$src)>; 9007 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9008 VK2WM:$mask), 9009 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9010 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9011 VK2WM:$mask), 9012 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 9013 9014 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 9015 (VCVTTPD2DQZ128rmb addr:$src)>; 9016 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 9017 (v4i32 VR128X:$src0), VK2WM:$mask), 9018 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9019 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 9020 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9021 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 9022 9023 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 9024 // patterns have been disabled with null_frag. 9025 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 9026 (VCVTPD2UDQZ128rr VR128X:$src)>; 9027 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 9028 VK2WM:$mask), 9029 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 9030 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 9031 VK2WM:$mask), 9032 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9033 9034 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 9035 (VCVTPD2UDQZ128rm addr:$src)>; 9036 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9037 VK2WM:$mask), 9038 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9039 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9040 VK2WM:$mask), 9041 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 9042 9043 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 9044 (VCVTPD2UDQZ128rmb addr:$src)>; 9045 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 9046 (v4i32 VR128X:$src0), VK2WM:$mask), 9047 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9048 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 9049 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9050 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 9051 9052 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 9053 // patterns have been disabled with null_frag. 9054 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 9055 (VCVTTPD2UDQZ128rr VR128X:$src)>; 9056 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 9057 VK2WM:$mask), 9058 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 9059 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 9060 VK2WM:$mask), 9061 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9062 9063 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 9064 (VCVTTPD2UDQZ128rm addr:$src)>; 9065 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9066 VK2WM:$mask), 9067 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9068 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9069 VK2WM:$mask), 9070 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 9071 9072 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 9073 (VCVTTPD2UDQZ128rmb addr:$src)>; 9074 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9075 (v4i32 VR128X:$src0), VK2WM:$mask), 9076 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9077 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9078 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9079 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 9080} 9081 9082let Predicates = [HasDQI, HasVLX] in { 9083 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9084 (VCVTPS2QQZ128rm addr:$src)>; 9085 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9086 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9087 VR128X:$src0)), 9088 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9089 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9090 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9091 v2i64x_info.ImmAllZerosV)), 9092 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9093 9094 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9095 (VCVTPS2UQQZ128rm addr:$src)>; 9096 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9097 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9098 VR128X:$src0)), 9099 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9100 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9101 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9102 v2i64x_info.ImmAllZerosV)), 9103 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9104 9105 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9106 (VCVTTPS2QQZ128rm addr:$src)>; 9107 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9108 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9109 VR128X:$src0)), 9110 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9111 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9112 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9113 v2i64x_info.ImmAllZerosV)), 9114 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9115 9116 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9117 (VCVTTPS2UQQZ128rm addr:$src)>; 9118 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9119 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9120 VR128X:$src0)), 9121 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9122 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9123 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9124 v2i64x_info.ImmAllZerosV)), 9125 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9126} 9127 9128let Predicates = [HasVLX] in { 9129 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9130 (VCVTDQ2PDZ128rm addr:$src)>; 9131 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9132 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9133 VR128X:$src0)), 9134 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9135 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9136 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9137 v2f64x_info.ImmAllZerosV)), 9138 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9139 9140 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9141 (VCVTUDQ2PDZ128rm addr:$src)>; 9142 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9143 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9144 VR128X:$src0)), 9145 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9146 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9147 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9148 v2f64x_info.ImmAllZerosV)), 9149 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9150} 9151 9152//===----------------------------------------------------------------------===// 9153// Half precision conversion instructions 9154//===----------------------------------------------------------------------===// 9155 9156let Uses = [MXCSR], mayRaiseFPException = 1 in 9157multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9158 X86MemOperand x86memop, dag ld_dag, 9159 X86FoldableSchedWrite sched> { 9160 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 9161 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 9162 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 9163 (X86cvtph2ps (_src.VT _src.RC:$src))>, 9164 T8PD, Sched<[sched]>; 9165 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 9166 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 9167 (X86any_cvtph2ps (_src.VT ld_dag)), 9168 (X86cvtph2ps (_src.VT ld_dag))>, 9169 T8PD, Sched<[sched.Folded]>; 9170} 9171 9172multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9173 X86FoldableSchedWrite sched> { 9174 let Uses = [MXCSR] in 9175 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 9176 (ins _src.RC:$src), "vcvtph2ps", 9177 "{sae}, $src", "$src, {sae}", 9178 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 9179 T8PD, EVEX_B, Sched<[sched]>; 9180} 9181 9182let Predicates = [HasAVX512] in 9183 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 9184 (load addr:$src), WriteCvtPH2PSZ>, 9185 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 9186 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9187 9188let Predicates = [HasVLX] in { 9189 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 9190 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 9191 EVEX_CD8<32, CD8VH>; 9192 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 9193 (bitconvert (v2i64 (X86vzload64 addr:$src))), 9194 WriteCvtPH2PS>, EVEX, EVEX_V128, 9195 EVEX_CD8<32, CD8VH>; 9196 9197 // Pattern match vcvtph2ps of a scalar i64 load. 9198 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 9199 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 9200 (VCVTPH2PSZ128rm addr:$src)>; 9201} 9202 9203multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9204 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 9205let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9206 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9207 (ins _src.RC:$src1, i32u8imm:$src2), 9208 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 9209 [(set _dest.RC:$dst, 9210 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9211 Sched<[RR]>; 9212 let Constraints = "$src0 = $dst" in 9213 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9214 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9215 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 9216 [(set _dest.RC:$dst, 9217 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9218 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9219 Sched<[RR]>, EVEX_K; 9220 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9221 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9222 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 9223 [(set _dest.RC:$dst, 9224 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9225 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9226 Sched<[RR]>, EVEX_KZ; 9227 let hasSideEffects = 0, mayStore = 1 in { 9228 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 9229 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 9230 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9231 Sched<[MR]>; 9232 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 9233 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9234 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 9235 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 9236 } 9237} 9238} 9239 9240multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9241 SchedWrite Sched> { 9242 let hasSideEffects = 0, Uses = [MXCSR] in { 9243 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9244 (ins _src.RC:$src1, i32u8imm:$src2), 9245 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}", 9246 [(set _dest.RC:$dst, 9247 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9248 EVEX_B, Sched<[Sched]>; 9249 let Constraints = "$src0 = $dst" in 9250 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9251 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9252 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}", 9253 [(set _dest.RC:$dst, 9254 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 9255 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9256 EVEX_B, Sched<[Sched]>, EVEX_K; 9257 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9258 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9259 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}", 9260 [(set _dest.RC:$dst, 9261 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 9262 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9263 EVEX_B, Sched<[Sched]>, EVEX_KZ; 9264} 9265} 9266 9267let Predicates = [HasAVX512] in { 9268 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 9269 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 9270 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 9271 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9272 9273 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 9274 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 9275} 9276 9277let Predicates = [HasVLX] in { 9278 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9279 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9280 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9281 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9282 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9283 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9284 9285 def : Pat<(store (f64 (extractelt 9286 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9287 (iPTR 0))), addr:$dst), 9288 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9289 def : Pat<(store (i64 (extractelt 9290 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9291 (iPTR 0))), addr:$dst), 9292 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9293 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9294 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9295} 9296 9297// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9298multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9299 string OpcodeStr, Domain d, 9300 X86FoldableSchedWrite sched = WriteFComX> { 9301 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9302 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9303 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9304 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9305} 9306 9307let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9308 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9309 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9310 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9311 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 9312 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9313 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9314 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9315 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 9316} 9317 9318let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9319 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9320 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9321 EVEX_CD8<32, CD8VT1>; 9322 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9323 "ucomisd", SSEPackedDouble>, PD, EVEX, 9324 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9325 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9326 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9327 EVEX_CD8<32, CD8VT1>; 9328 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9329 "comisd", SSEPackedDouble>, PD, EVEX, 9330 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9331 let isCodeGenOnly = 1 in { 9332 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9333 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9334 EVEX_CD8<32, CD8VT1>; 9335 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9336 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 9337 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9338 9339 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9340 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9341 EVEX_CD8<32, CD8VT1>; 9342 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9343 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 9344 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9345 } 9346} 9347 9348let Defs = [EFLAGS], Predicates = [HasFP16] in { 9349 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9350 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9351 EVEX_CD8<16, CD8VT1>; 9352 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9353 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9354 EVEX_CD8<16, CD8VT1>; 9355 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9356 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX, 9357 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9358 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9359 "comish", SSEPackedSingle>, T_MAP5PS, EVEX, 9360 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9361 let isCodeGenOnly = 1 in { 9362 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9363 sse_load_f16, "ucomish", SSEPackedSingle>, 9364 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9365 9366 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9367 sse_load_f16, "comish", SSEPackedSingle>, 9368 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9369 } 9370} 9371 9372/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9373multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9374 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9375 Predicate prd = HasAVX512> { 9376 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9377 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9378 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9379 "$src2, $src1", "$src1, $src2", 9380 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9381 EVEX_4V, VEX_LIG, Sched<[sched]>; 9382 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9383 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9384 "$src2, $src1", "$src1, $src2", 9385 (OpNode (_.VT _.RC:$src1), 9386 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, 9387 Sched<[sched.Folded, sched.ReadAfterFold]>; 9388} 9389} 9390 9391defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9392 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9393 T_MAP6PD; 9394defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9395 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9396 EVEX_CD8<16, CD8VT1>, T_MAP6PD; 9397let Uses = [MXCSR] in { 9398defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9399 f32x_info>, EVEX_CD8<32, CD8VT1>, 9400 T8PD; 9401defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9402 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 9403 T8PD; 9404defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9405 SchedWriteFRsqrt.Scl, f32x_info>, 9406 EVEX_CD8<32, CD8VT1>, T8PD; 9407defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9408 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 9409 EVEX_CD8<64, CD8VT1>, T8PD; 9410} 9411 9412/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9413multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9414 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9415 let ExeDomain = _.ExeDomain in { 9416 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9417 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9418 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 9419 Sched<[sched]>; 9420 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9421 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9422 (OpNode (_.VT 9423 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 9424 Sched<[sched.Folded, sched.ReadAfterFold]>; 9425 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9426 (ins _.ScalarMemOp:$src), OpcodeStr, 9427 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9428 (OpNode (_.VT 9429 (_.BroadcastLdFrag addr:$src)))>, 9430 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9431 } 9432} 9433 9434multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9435 X86SchedWriteWidths sched> { 9436 let Uses = [MXCSR] in { 9437 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9438 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9439 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9440 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 9441 } 9442 let Predicates = [HasFP16] in 9443 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9444 v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9445 9446 // Define only if AVX512VL feature is present. 9447 let Predicates = [HasVLX], Uses = [MXCSR] in { 9448 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9449 OpNode, sched.XMM, v4f32x_info>, 9450 EVEX_V128, EVEX_CD8<32, CD8VF>; 9451 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9452 OpNode, sched.YMM, v8f32x_info>, 9453 EVEX_V256, EVEX_CD8<32, CD8VF>; 9454 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9455 OpNode, sched.XMM, v2f64x_info>, 9456 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 9457 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9458 OpNode, sched.YMM, v4f64x_info>, 9459 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 9460 } 9461 let Predicates = [HasFP16, HasVLX] in { 9462 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9463 OpNode, sched.XMM, v8f16x_info>, 9464 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9465 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9466 OpNode, sched.YMM, v16f16x_info>, 9467 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9468 } 9469} 9470 9471defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9472defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9473 9474/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9475multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9476 SDNode OpNode, SDNode OpNodeSAE, 9477 X86FoldableSchedWrite sched> { 9478 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9479 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9480 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9481 "$src2, $src1", "$src1, $src2", 9482 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9483 Sched<[sched]>, SIMD_EXC; 9484 9485 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9486 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9487 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9488 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9489 EVEX_B, Sched<[sched]>; 9490 9491 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9492 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9493 "$src2, $src1", "$src1, $src2", 9494 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9495 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9496 } 9497} 9498 9499multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9500 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9501 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9502 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V; 9503 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9504 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V; 9505} 9506 9507multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9508 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9509 let Predicates = [HasFP16] in 9510 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9511 EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V; 9512} 9513 9514let Predicates = [HasERI] in { 9515 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 9516 SchedWriteFRcp.Scl>; 9517 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 9518 SchedWriteFRsqrt.Scl>; 9519} 9520 9521defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9522 SchedWriteFRnd.Scl>, 9523 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9524 SchedWriteFRnd.Scl>; 9525/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9526 9527multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9528 SDNode OpNode, X86FoldableSchedWrite sched> { 9529 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9530 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9531 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9532 (OpNode (_.VT _.RC:$src))>, 9533 Sched<[sched]>; 9534 9535 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9536 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9537 (OpNode (_.VT 9538 (bitconvert (_.LdFrag addr:$src))))>, 9539 Sched<[sched.Folded, sched.ReadAfterFold]>; 9540 9541 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9542 (ins _.ScalarMemOp:$src), OpcodeStr, 9543 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9544 (OpNode (_.VT 9545 (_.BroadcastLdFrag addr:$src)))>, 9546 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9547 } 9548} 9549multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9550 SDNode OpNode, X86FoldableSchedWrite sched> { 9551 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9552 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9553 (ins _.RC:$src), OpcodeStr, 9554 "{sae}, $src", "$src, {sae}", 9555 (OpNode (_.VT _.RC:$src))>, 9556 EVEX_B, Sched<[sched]>; 9557} 9558 9559multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9560 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9561 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9562 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9563 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9564 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9565 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9566 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 9567} 9568 9569multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9570 SDNode OpNode, X86SchedWriteWidths sched> { 9571 // Define only if AVX512VL feature is present. 9572 let Predicates = [HasVLX] in { 9573 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9574 sched.XMM>, 9575 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 9576 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9577 sched.YMM>, 9578 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 9579 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9580 sched.XMM>, 9581 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 9582 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9583 sched.YMM>, 9584 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 9585 } 9586} 9587 9588multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9589 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9590 let Predicates = [HasFP16] in 9591 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9592 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9593 T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9594 let Predicates = [HasFP16, HasVLX] in { 9595 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9596 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9597 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9598 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9599 } 9600} 9601let Predicates = [HasERI] in { 9602 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 9603 SchedWriteFRsqrt>, EVEX; 9604 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 9605 SchedWriteFRcp>, EVEX; 9606 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 9607 SchedWriteFAdd>, EVEX; 9608} 9609defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9610 SchedWriteFRnd>, 9611 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9612 SchedWriteFRnd>, 9613 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9614 SchedWriteFRnd>, EVEX; 9615 9616multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9617 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9618 let ExeDomain = _.ExeDomain in 9619 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9620 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9621 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9622 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9623} 9624 9625multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9626 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9627 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9628 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9629 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9630 (_.VT (any_fsqrt _.RC:$src)), 9631 (_.VT (fsqrt _.RC:$src))>, EVEX, 9632 Sched<[sched]>; 9633 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9634 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9635 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9636 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9637 Sched<[sched.Folded, sched.ReadAfterFold]>; 9638 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9639 (ins _.ScalarMemOp:$src), OpcodeStr, 9640 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9641 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9642 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9643 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9644 } 9645} 9646 9647let Uses = [MXCSR], mayRaiseFPException = 1 in 9648multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9649 X86SchedWriteSizes sched> { 9650 let Predicates = [HasFP16] in 9651 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9652 sched.PH.ZMM, v32f16_info>, 9653 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9654 let Predicates = [HasFP16, HasVLX] in { 9655 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9656 sched.PH.XMM, v8f16x_info>, 9657 EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9658 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9659 sched.PH.YMM, v16f16x_info>, 9660 EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9661 } 9662 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9663 sched.PS.ZMM, v16f32_info>, 9664 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9665 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9666 sched.PD.ZMM, v8f64_info>, 9667 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9668 // Define only if AVX512VL feature is present. 9669 let Predicates = [HasVLX] in { 9670 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9671 sched.PS.XMM, v4f32x_info>, 9672 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 9673 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9674 sched.PS.YMM, v8f32x_info>, 9675 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 9676 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9677 sched.PD.XMM, v2f64x_info>, 9678 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9679 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9680 sched.PD.YMM, v4f64x_info>, 9681 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9682 } 9683} 9684 9685let Uses = [MXCSR] in 9686multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9687 X86SchedWriteSizes sched> { 9688 let Predicates = [HasFP16] in 9689 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9690 sched.PH.ZMM, v32f16_info>, 9691 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9692 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9693 sched.PS.ZMM, v16f32_info>, 9694 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9695 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9696 sched.PD.ZMM, v8f64_info>, 9697 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9698} 9699 9700multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9701 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9702 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9703 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9704 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9705 "$src2, $src1", "$src1, $src2", 9706 (X86fsqrts (_.VT _.RC:$src1), 9707 (_.VT _.RC:$src2))>, 9708 Sched<[sched]>, SIMD_EXC; 9709 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9710 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9711 "$src2, $src1", "$src1, $src2", 9712 (X86fsqrts (_.VT _.RC:$src1), 9713 (_.ScalarIntMemFrags addr:$src2))>, 9714 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9715 let Uses = [MXCSR] in 9716 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9717 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9718 "$rc, $src2, $src1", "$src1, $src2, $rc", 9719 (X86fsqrtRnds (_.VT _.RC:$src1), 9720 (_.VT _.RC:$src2), 9721 (i32 timm:$rc))>, 9722 EVEX_B, EVEX_RC, Sched<[sched]>; 9723 9724 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9725 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9726 (ins _.FRC:$src1, _.FRC:$src2), 9727 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9728 Sched<[sched]>, SIMD_EXC; 9729 let mayLoad = 1 in 9730 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9731 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9732 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9733 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9734 } 9735 } 9736 9737 let Predicates = [prd] in { 9738 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9739 (!cast<Instruction>(Name#Zr) 9740 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9741 } 9742 9743 let Predicates = [prd, OptForSize] in { 9744 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9745 (!cast<Instruction>(Name#Zm) 9746 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9747 } 9748} 9749 9750multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9751 X86SchedWriteSizes sched> { 9752 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9753 EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS; 9754 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9755 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9756 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9757 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9758} 9759 9760defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9761 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9762 9763defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9764 9765multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9766 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9767 let ExeDomain = _.ExeDomain in { 9768 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9769 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9770 "$src3, $src2, $src1", "$src1, $src2, $src3", 9771 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9772 (i32 timm:$src3)))>, 9773 Sched<[sched]>, SIMD_EXC; 9774 9775 let Uses = [MXCSR] in 9776 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9777 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9778 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9779 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9780 (i32 timm:$src3)))>, EVEX_B, 9781 Sched<[sched]>; 9782 9783 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9784 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9785 OpcodeStr, 9786 "$src3, $src2, $src1", "$src1, $src2, $src3", 9787 (_.VT (X86RndScales _.RC:$src1, 9788 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9789 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9790 9791 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9792 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9793 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9794 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9795 []>, Sched<[sched]>, SIMD_EXC; 9796 9797 let mayLoad = 1 in 9798 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9799 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9800 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9801 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9802 } 9803 } 9804 9805 let Predicates = [HasAVX512] in { 9806 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9807 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9808 _.FRC:$src1, timm:$src2))>; 9809 } 9810 9811 let Predicates = [HasAVX512, OptForSize] in { 9812 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9813 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9814 addr:$src1, timm:$src2))>; 9815 } 9816} 9817 9818let Predicates = [HasFP16] in 9819defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9820 SchedWriteFRnd.Scl, f16x_info>, 9821 AVX512PSIi8Base, TA, EVEX_4V, 9822 EVEX_CD8<16, CD8VT1>; 9823 9824defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9825 SchedWriteFRnd.Scl, f32x_info>, 9826 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9827 EVEX_CD8<32, CD8VT1>; 9828 9829defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9830 SchedWriteFRnd.Scl, f64x_info>, 9831 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9832 EVEX_CD8<64, CD8VT1>; 9833 9834multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9835 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9836 dag OutMask, Predicate BasePredicate> { 9837 let Predicates = [BasePredicate] in { 9838 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9839 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9840 (extractelt _.VT:$dst, (iPTR 0))))), 9841 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9842 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9843 9844 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9845 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9846 ZeroFP))), 9847 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9848 OutMask, _.VT:$src2, _.VT:$src1)>; 9849 } 9850} 9851 9852defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9853 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9854 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9855defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9856 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9857 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9858defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9859 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9860 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9861 9862 9863//------------------------------------------------- 9864// Integer truncate and extend operations 9865//------------------------------------------------- 9866 9867// PatFrags that contain a select and a truncate op. The take operands in the 9868// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9869// either to the multiclasses. 9870def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9871 (vselect_mask node:$mask, 9872 (trunc node:$src), node:$src0)>; 9873def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9874 (vselect_mask node:$mask, 9875 (X86vtruncs node:$src), node:$src0)>; 9876def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9877 (vselect_mask node:$mask, 9878 (X86vtruncus node:$src), node:$src0)>; 9879 9880multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9881 SDPatternOperator MaskNode, 9882 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9883 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9884 let ExeDomain = DestInfo.ExeDomain in { 9885 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9886 (ins SrcInfo.RC:$src), 9887 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9888 [(set DestInfo.RC:$dst, 9889 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9890 EVEX, Sched<[sched]>; 9891 let Constraints = "$src0 = $dst" in 9892 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9893 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9894 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9895 [(set DestInfo.RC:$dst, 9896 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9897 (DestInfo.VT DestInfo.RC:$src0), 9898 SrcInfo.KRCWM:$mask))]>, 9899 EVEX, EVEX_K, Sched<[sched]>; 9900 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9901 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9902 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9903 [(set DestInfo.RC:$dst, 9904 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9905 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9906 EVEX, EVEX_KZ, Sched<[sched]>; 9907 } 9908 9909 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9910 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9911 (ins x86memop:$dst, SrcInfo.RC:$src), 9912 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9913 EVEX, Sched<[sched.Folded]>; 9914 9915 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9916 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9917 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9918 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9919 }//mayStore = 1, hasSideEffects = 0 9920} 9921 9922multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9923 PatFrag truncFrag, PatFrag mtruncFrag, 9924 string Name> { 9925 9926 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9927 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9928 addr:$dst, SrcInfo.RC:$src)>; 9929 9930 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9931 SrcInfo.KRCWM:$mask), 9932 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9933 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9934} 9935 9936multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9937 SDNode OpNode256, SDNode OpNode512, 9938 SDPatternOperator MaskNode128, 9939 SDPatternOperator MaskNode256, 9940 SDPatternOperator MaskNode512, 9941 X86SchedWriteWidths sched, 9942 AVX512VLVectorVTInfo VTSrcInfo, 9943 X86VectorVTInfo DestInfoZ128, 9944 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9945 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9946 X86MemOperand x86memopZ, PatFrag truncFrag, 9947 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9948 9949 let Predicates = [HasVLX, prd] in { 9950 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM, 9951 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9952 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9953 mtruncFrag, NAME>, EVEX_V128; 9954 9955 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM, 9956 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9957 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9958 mtruncFrag, NAME>, EVEX_V256; 9959 } 9960 let Predicates = [prd] in 9961 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM, 9962 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9963 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9964 mtruncFrag, NAME>, EVEX_V512; 9965} 9966 9967multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9968 X86SchedWriteWidths sched, PatFrag StoreNode, 9969 PatFrag MaskedStoreNode, SDNode InVecNode, 9970 SDPatternOperator InVecMaskNode> { 9971 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9972 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9973 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9974 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9975 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9976} 9977 9978multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9979 SDPatternOperator MaskNode, 9980 X86SchedWriteWidths sched, PatFrag StoreNode, 9981 PatFrag MaskedStoreNode, SDNode InVecNode, 9982 SDPatternOperator InVecMaskNode> { 9983 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9984 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9985 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9986 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9987 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9988} 9989 9990multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9991 SDPatternOperator MaskNode, 9992 X86SchedWriteWidths sched, PatFrag StoreNode, 9993 PatFrag MaskedStoreNode, SDNode InVecNode, 9994 SDPatternOperator InVecMaskNode> { 9995 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9996 InVecMaskNode, MaskNode, MaskNode, sched, 9997 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9998 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9999 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 10000} 10001 10002multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 10003 SDPatternOperator MaskNode, 10004 X86SchedWriteWidths sched, PatFrag StoreNode, 10005 PatFrag MaskedStoreNode, SDNode InVecNode, 10006 SDPatternOperator InVecMaskNode> { 10007 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 10008 InVecMaskNode, InVecMaskNode, MaskNode, sched, 10009 avx512vl_i32_info, v16i8x_info, v16i8x_info, 10010 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 10011 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 10012} 10013 10014multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 10015 SDPatternOperator MaskNode, 10016 X86SchedWriteWidths sched, PatFrag StoreNode, 10017 PatFrag MaskedStoreNode, SDNode InVecNode, 10018 SDPatternOperator InVecMaskNode> { 10019 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 10020 InVecMaskNode, MaskNode, MaskNode, sched, 10021 avx512vl_i32_info, v8i16x_info, v8i16x_info, 10022 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 10023 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 10024} 10025 10026multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10027 SDPatternOperator MaskNode, 10028 X86SchedWriteWidths sched, PatFrag StoreNode, 10029 PatFrag MaskedStoreNode, SDNode InVecNode, 10030 SDPatternOperator InVecMaskNode> { 10031 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 10032 InVecMaskNode, MaskNode, MaskNode, sched, 10033 avx512vl_i16_info, v16i8x_info, v16i8x_info, 10034 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 10035 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 10036} 10037 10038defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 10039 SchedWriteVecTruncate, truncstorevi8, 10040 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 10041defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 10042 SchedWriteVecTruncate, truncstore_s_vi8, 10043 masked_truncstore_s_vi8, X86vtruncs, 10044 X86vmtruncs>; 10045defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 10046 SchedWriteVecTruncate, truncstore_us_vi8, 10047 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 10048 10049defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 10050 SchedWriteVecTruncate, truncstorevi16, 10051 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 10052defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 10053 SchedWriteVecTruncate, truncstore_s_vi16, 10054 masked_truncstore_s_vi16, X86vtruncs, 10055 X86vmtruncs>; 10056defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 10057 select_truncus, SchedWriteVecTruncate, 10058 truncstore_us_vi16, masked_truncstore_us_vi16, 10059 X86vtruncus, X86vmtruncus>; 10060 10061defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 10062 SchedWriteVecTruncate, truncstorevi32, 10063 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 10064defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 10065 SchedWriteVecTruncate, truncstore_s_vi32, 10066 masked_truncstore_s_vi32, X86vtruncs, 10067 X86vmtruncs>; 10068defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 10069 select_truncus, SchedWriteVecTruncate, 10070 truncstore_us_vi32, masked_truncstore_us_vi32, 10071 X86vtruncus, X86vmtruncus>; 10072 10073defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 10074 SchedWriteVecTruncate, truncstorevi8, 10075 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 10076defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 10077 SchedWriteVecTruncate, truncstore_s_vi8, 10078 masked_truncstore_s_vi8, X86vtruncs, 10079 X86vmtruncs>; 10080defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 10081 select_truncus, SchedWriteVecTruncate, 10082 truncstore_us_vi8, masked_truncstore_us_vi8, 10083 X86vtruncus, X86vmtruncus>; 10084 10085defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 10086 SchedWriteVecTruncate, truncstorevi16, 10087 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 10088defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 10089 SchedWriteVecTruncate, truncstore_s_vi16, 10090 masked_truncstore_s_vi16, X86vtruncs, 10091 X86vmtruncs>; 10092defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 10093 select_truncus, SchedWriteVecTruncate, 10094 truncstore_us_vi16, masked_truncstore_us_vi16, 10095 X86vtruncus, X86vmtruncus>; 10096 10097defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 10098 SchedWriteVecTruncate, truncstorevi8, 10099 masked_truncstorevi8, X86vtrunc, 10100 X86vmtrunc>; 10101defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 10102 SchedWriteVecTruncate, truncstore_s_vi8, 10103 masked_truncstore_s_vi8, X86vtruncs, 10104 X86vmtruncs>; 10105defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 10106 select_truncus, SchedWriteVecTruncate, 10107 truncstore_us_vi8, masked_truncstore_us_vi8, 10108 X86vtruncus, X86vmtruncus>; 10109 10110let Predicates = [HasAVX512, NoVLX] in { 10111def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 10112 (v8i16 (EXTRACT_SUBREG 10113 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 10114 VR256X:$src, sub_ymm)))), sub_xmm))>; 10115def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 10116 (v4i32 (EXTRACT_SUBREG 10117 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 10118 VR256X:$src, sub_ymm)))), sub_xmm))>; 10119} 10120 10121let Predicates = [HasBWI, NoVLX] in { 10122def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10123 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 10124 VR256X:$src, sub_ymm))), sub_xmm))>; 10125} 10126 10127// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 10128multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 10129 X86VectorVTInfo DestInfo, 10130 X86VectorVTInfo SrcInfo> { 10131 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10132 DestInfo.RC:$src0, 10133 SrcInfo.KRCWM:$mask)), 10134 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 10135 SrcInfo.KRCWM:$mask, 10136 SrcInfo.RC:$src)>; 10137 10138 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10139 DestInfo.ImmAllZerosV, 10140 SrcInfo.KRCWM:$mask)), 10141 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 10142 SrcInfo.RC:$src)>; 10143} 10144 10145let Predicates = [HasVLX] in { 10146defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 10147defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 10148defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 10149} 10150 10151let Predicates = [HasAVX512] in { 10152defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 10153defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 10154defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 10155 10156defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 10157defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 10158defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 10159 10160defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 10161defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 10162defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 10163} 10164 10165multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 10166 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 10167 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 10168 let ExeDomain = DestInfo.ExeDomain in { 10169 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10170 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 10171 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 10172 EVEX, Sched<[sched]>; 10173 10174 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10175 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 10176 (DestInfo.VT (LdFrag addr:$src))>, 10177 EVEX, Sched<[sched.Folded]>; 10178 } 10179} 10180 10181multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, 10182 SDNode OpNode, SDNode InVecNode, string ExtTy, 10183 X86SchedWriteWidths sched, 10184 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10185 let Predicates = [HasVLX, HasBWI] in { 10186 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info, 10187 v16i8x_info, i64mem, LdFrag, InVecNode>, 10188 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 10189 10190 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info, 10191 v16i8x_info, i128mem, LdFrag, OpNode>, 10192 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 10193 } 10194 let Predicates = [HasBWI] in { 10195 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info, 10196 v32i8x_info, i256mem, LdFrag, OpNode>, 10197 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 10198 } 10199} 10200 10201multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, 10202 SDNode OpNode, SDNode InVecNode, string ExtTy, 10203 X86SchedWriteWidths sched, 10204 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10205 let Predicates = [HasVLX, HasAVX512] in { 10206 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 10207 v16i8x_info, i32mem, LdFrag, InVecNode>, 10208 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 10209 10210 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 10211 v16i8x_info, i64mem, LdFrag, InVecNode>, 10212 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 10213 } 10214 let Predicates = [HasAVX512] in { 10215 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 10216 v16i8x_info, i128mem, LdFrag, OpNode>, 10217 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 10218 } 10219} 10220 10221multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, 10222 SDNode InVecNode, string ExtTy, 10223 X86SchedWriteWidths sched, 10224 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10225 let Predicates = [HasVLX, HasAVX512] in { 10226 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10227 v16i8x_info, i16mem, LdFrag, InVecNode>, 10228 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 10229 10230 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10231 v16i8x_info, i32mem, LdFrag, InVecNode>, 10232 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 10233 } 10234 let Predicates = [HasAVX512] in { 10235 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10236 v16i8x_info, i64mem, LdFrag, InVecNode>, 10237 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 10238 } 10239} 10240 10241multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, 10242 SDNode OpNode, SDNode InVecNode, string ExtTy, 10243 X86SchedWriteWidths sched, 10244 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10245 let Predicates = [HasVLX, HasAVX512] in { 10246 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 10247 v8i16x_info, i64mem, LdFrag, InVecNode>, 10248 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 10249 10250 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 10251 v8i16x_info, i128mem, LdFrag, OpNode>, 10252 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 10253 } 10254 let Predicates = [HasAVX512] in { 10255 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 10256 v16i16x_info, i256mem, LdFrag, OpNode>, 10257 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 10258 } 10259} 10260 10261multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, 10262 SDNode OpNode, SDNode InVecNode, string ExtTy, 10263 X86SchedWriteWidths sched, 10264 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10265 let Predicates = [HasVLX, HasAVX512] in { 10266 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10267 v8i16x_info, i32mem, LdFrag, InVecNode>, 10268 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 10269 10270 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10271 v8i16x_info, i64mem, LdFrag, InVecNode>, 10272 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 10273 } 10274 let Predicates = [HasAVX512] in { 10275 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10276 v8i16x_info, i128mem, LdFrag, OpNode>, 10277 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 10278 } 10279} 10280 10281multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, 10282 SDNode OpNode, SDNode InVecNode, string ExtTy, 10283 X86SchedWriteWidths sched, 10284 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 10285 10286 let Predicates = [HasVLX, HasAVX512] in { 10287 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10288 v4i32x_info, i64mem, LdFrag, InVecNode>, 10289 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 10290 10291 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10292 v4i32x_info, i128mem, LdFrag, OpNode>, 10293 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 10294 } 10295 let Predicates = [HasAVX512] in { 10296 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10297 v8i32x_info, i256mem, LdFrag, OpNode>, 10298 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 10299 } 10300} 10301 10302defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>; 10303defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>; 10304defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>; 10305defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>; 10306defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>; 10307defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>; 10308 10309defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>; 10310defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>; 10311defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>; 10312defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>; 10313defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>; 10314defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>; 10315 10316 10317// Patterns that we also need any extend versions of. aext_vector_inreg 10318// is currently legalized to zext_vector_inreg. 10319multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10320 // 256-bit patterns 10321 let Predicates = [HasVLX, HasBWI] in { 10322 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10323 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10324 } 10325 10326 let Predicates = [HasVLX] in { 10327 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10328 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10329 10330 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10331 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10332 } 10333 10334 // 512-bit patterns 10335 let Predicates = [HasBWI] in { 10336 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10337 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10338 } 10339 let Predicates = [HasAVX512] in { 10340 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10341 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10342 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10343 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10344 10345 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10346 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10347 10348 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10349 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10350 } 10351} 10352 10353multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10354 SDNode InVecOp> : 10355 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10356 // 128-bit patterns 10357 let Predicates = [HasVLX, HasBWI] in { 10358 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10359 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10360 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10361 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10362 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10363 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10364 } 10365 let Predicates = [HasVLX] in { 10366 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10367 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10368 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10369 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10370 10371 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10372 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10373 10374 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10375 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10376 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10377 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10378 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10379 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10380 10381 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10382 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10383 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10384 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10385 10386 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10387 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10388 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10389 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10390 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10391 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10392 } 10393 let Predicates = [HasVLX] in { 10394 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10395 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10396 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10397 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10398 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10399 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10400 10401 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10402 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10403 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10404 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10405 10406 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10407 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10408 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10409 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10410 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10411 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10412 } 10413 // 512-bit patterns 10414 let Predicates = [HasAVX512] in { 10415 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10416 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10417 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10418 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10419 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10420 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10421 } 10422} 10423 10424defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10425defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10426 10427// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10428// ext+trunc aggressively making it impossible to legalize the DAG to this 10429// pattern directly. 10430let Predicates = [HasAVX512, NoBWI] in { 10431def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10432 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10433def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10434 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10435} 10436 10437//===----------------------------------------------------------------------===// 10438// GATHER - SCATTER Operations 10439 10440// FIXME: Improve scheduling of gather/scatter instructions. 10441multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10442 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10443 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10444 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10445 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10446 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10447 !strconcat(OpcodeStr#_.Suffix, 10448 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10449 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10450 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10451} 10452 10453multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10454 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10455 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10456 vy512xmem>, EVEX_V512, VEX_W; 10457 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10458 vz512mem>, EVEX_V512, VEX_W; 10459let Predicates = [HasVLX] in { 10460 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10461 vx256xmem>, EVEX_V256, VEX_W; 10462 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10463 vy256xmem>, EVEX_V256, VEX_W; 10464 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10465 vx128xmem>, EVEX_V128, VEX_W; 10466 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10467 vx128xmem>, EVEX_V128, VEX_W; 10468} 10469} 10470 10471multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10472 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10473 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10474 EVEX_V512; 10475 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10476 EVEX_V512; 10477let Predicates = [HasVLX] in { 10478 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10479 vy256xmem>, EVEX_V256; 10480 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10481 vy128xmem>, EVEX_V256; 10482 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10483 vx128xmem>, EVEX_V128; 10484 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10485 vx64xmem, VK2WM>, EVEX_V128; 10486} 10487} 10488 10489 10490defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10491 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10492 10493defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10494 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10495 10496multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10497 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10498 10499let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10500 hasSideEffects = 0 in 10501 10502 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10503 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10504 !strconcat(OpcodeStr#_.Suffix, 10505 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10506 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10507 Sched<[WriteStore]>; 10508} 10509 10510multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10511 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10512 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10513 vy512xmem>, EVEX_V512, VEX_W; 10514 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10515 vz512mem>, EVEX_V512, VEX_W; 10516let Predicates = [HasVLX] in { 10517 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10518 vx256xmem>, EVEX_V256, VEX_W; 10519 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10520 vy256xmem>, EVEX_V256, VEX_W; 10521 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10522 vx128xmem>, EVEX_V128, VEX_W; 10523 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10524 vx128xmem>, EVEX_V128, VEX_W; 10525} 10526} 10527 10528multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10529 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10530 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10531 EVEX_V512; 10532 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10533 EVEX_V512; 10534let Predicates = [HasVLX] in { 10535 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10536 vy256xmem>, EVEX_V256; 10537 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10538 vy128xmem>, EVEX_V256; 10539 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10540 vx128xmem>, EVEX_V128; 10541 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10542 vx64xmem, VK2WM>, EVEX_V128; 10543} 10544} 10545 10546defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10547 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10548 10549defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10550 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10551 10552// prefetch 10553multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10554 RegisterClass KRC, X86MemOperand memop> { 10555 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 10556 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10557 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10558 EVEX, EVEX_K, Sched<[WriteLoad]>; 10559} 10560 10561defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10562 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10563 10564defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10565 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10566 10567defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10568 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10569 10570defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10571 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10572 10573defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10574 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10575 10576defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10577 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10578 10579defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10580 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10581 10582defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10583 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10584 10585defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10586 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10587 10588defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10589 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10590 10591defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10592 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10593 10594defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10595 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10596 10597defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10598 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10599 10600defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10601 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10602 10603defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10604 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10605 10606defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10607 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10608 10609multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> { 10610def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10611 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10612 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10613 EVEX, Sched<[Sched]>; 10614} 10615 10616multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10617 string OpcodeStr, Predicate prd> { 10618let Predicates = [prd] in 10619 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512; 10620 10621 let Predicates = [prd, HasVLX] in { 10622 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256; 10623 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128; 10624 } 10625} 10626 10627defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10628defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 10629defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10630defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 10631 10632multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10633 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10634 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10635 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10636 EVEX, Sched<[WriteMove]>; 10637} 10638 10639// Use 512bit version to implement 128/256 bit in case NoVLX. 10640multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10641 X86VectorVTInfo _, 10642 string Name> { 10643 10644 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10645 (_.KVT (COPY_TO_REGCLASS 10646 (!cast<Instruction>(Name#"Zrr") 10647 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10648 _.RC:$src, _.SubRegIdx)), 10649 _.KRC))>; 10650} 10651 10652multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10653 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10654 let Predicates = [prd] in 10655 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10656 EVEX_V512; 10657 10658 let Predicates = [prd, HasVLX] in { 10659 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10660 EVEX_V256; 10661 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10662 EVEX_V128; 10663 } 10664 let Predicates = [prd, NoVLX] in { 10665 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10666 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10667 } 10668} 10669 10670defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10671 avx512vl_i8_info, HasBWI>; 10672defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10673 avx512vl_i16_info, HasBWI>, VEX_W; 10674defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10675 avx512vl_i32_info, HasDQI>; 10676defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10677 avx512vl_i64_info, HasDQI>, VEX_W; 10678 10679// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10680// is available, but BWI is not. We can't handle this in lowering because 10681// a target independent DAG combine likes to combine sext and trunc. 10682let Predicates = [HasDQI, NoBWI] in { 10683 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10684 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10685 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10686 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10687} 10688 10689let Predicates = [HasDQI, NoBWI, HasVLX] in { 10690 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10691 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 10692} 10693 10694//===----------------------------------------------------------------------===// 10695// AVX-512 - COMPRESS and EXPAND 10696// 10697 10698multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10699 string OpcodeStr, X86FoldableSchedWrite sched> { 10700 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10701 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10702 (null_frag)>, AVX5128IBase, 10703 Sched<[sched]>; 10704 10705 let mayStore = 1, hasSideEffects = 0 in 10706 def mr : AVX5128I<opc, MRMDestMem, (outs), 10707 (ins _.MemOp:$dst, _.RC:$src), 10708 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10709 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10710 Sched<[sched.Folded]>; 10711 10712 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10713 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10714 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10715 []>, 10716 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10717 Sched<[sched.Folded]>; 10718} 10719 10720multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10721 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10722 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10723 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10724 10725 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10726 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10727 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10728 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10729 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10730 _.KRCWM:$mask, _.RC:$src)>; 10731} 10732 10733multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10734 X86FoldableSchedWrite sched, 10735 AVX512VLVectorVTInfo VTInfo, 10736 Predicate Pred = HasAVX512> { 10737 let Predicates = [Pred] in 10738 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10739 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10740 10741 let Predicates = [Pred, HasVLX] in { 10742 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10743 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10744 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10745 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10746 } 10747} 10748 10749// FIXME: Is there a better scheduler class for VPCOMPRESS? 10750defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10751 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 10752defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10753 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 10754defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10755 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 10756defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10757 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 10758 10759// expand 10760multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10761 string OpcodeStr, X86FoldableSchedWrite sched> { 10762 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10763 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10764 (null_frag)>, AVX5128IBase, 10765 Sched<[sched]>; 10766 10767 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10768 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10769 (null_frag)>, 10770 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10771 Sched<[sched.Folded, sched.ReadAfterFold]>; 10772} 10773 10774multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10775 10776 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10777 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10778 _.KRCWM:$mask, addr:$src)>; 10779 10780 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10781 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10782 _.KRCWM:$mask, addr:$src)>; 10783 10784 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10785 (_.VT _.RC:$src0))), 10786 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10787 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10788 10789 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10790 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10791 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10792 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10793 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10794 _.KRCWM:$mask, _.RC:$src)>; 10795} 10796 10797multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10798 X86FoldableSchedWrite sched, 10799 AVX512VLVectorVTInfo VTInfo, 10800 Predicate Pred = HasAVX512> { 10801 let Predicates = [Pred] in 10802 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10803 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10804 10805 let Predicates = [Pred, HasVLX] in { 10806 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10807 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10808 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10809 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10810 } 10811} 10812 10813// FIXME: Is there a better scheduler class for VPEXPAND? 10814defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10815 avx512vl_i32_info>, EVEX; 10816defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10817 avx512vl_i64_info>, EVEX, VEX_W; 10818defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10819 avx512vl_f32_info>, EVEX; 10820defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10821 avx512vl_f64_info>, EVEX, VEX_W; 10822 10823//handle instruction reg_vec1 = op(reg_vec,imm) 10824// op(mem_vec,imm) 10825// op(broadcast(eltVt),imm) 10826//all instruction created with FROUND_CURRENT 10827multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10828 SDPatternOperator OpNode, 10829 SDPatternOperator MaskOpNode, 10830 X86FoldableSchedWrite sched, 10831 X86VectorVTInfo _> { 10832 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10833 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10834 (ins _.RC:$src1, i32u8imm:$src2), 10835 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10836 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10837 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10838 Sched<[sched]>; 10839 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10840 (ins _.MemOp:$src1, i32u8imm:$src2), 10841 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10842 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10843 (i32 timm:$src2)), 10844 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10845 (i32 timm:$src2))>, 10846 Sched<[sched.Folded, sched.ReadAfterFold]>; 10847 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10848 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10849 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10850 "${src1}"#_.BroadcastStr#", $src2", 10851 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10852 (i32 timm:$src2)), 10853 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10854 (i32 timm:$src2))>, EVEX_B, 10855 Sched<[sched.Folded, sched.ReadAfterFold]>; 10856 } 10857} 10858 10859//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10860multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10861 SDNode OpNode, X86FoldableSchedWrite sched, 10862 X86VectorVTInfo _> { 10863 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10864 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10865 (ins _.RC:$src1, i32u8imm:$src2), 10866 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10867 "$src1, {sae}, $src2", 10868 (OpNode (_.VT _.RC:$src1), 10869 (i32 timm:$src2))>, 10870 EVEX_B, Sched<[sched]>; 10871} 10872 10873multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10874 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10875 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10876 Predicate prd>{ 10877 let Predicates = [prd] in { 10878 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10879 sched.ZMM, _.info512>, 10880 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10881 sched.ZMM, _.info512>, EVEX_V512; 10882 } 10883 let Predicates = [prd, HasVLX] in { 10884 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10885 sched.XMM, _.info128>, EVEX_V128; 10886 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10887 sched.YMM, _.info256>, EVEX_V256; 10888 } 10889} 10890 10891//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10892// op(reg_vec2,mem_vec,imm) 10893// op(reg_vec2,broadcast(eltVt),imm) 10894//all instruction created with FROUND_CURRENT 10895multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10896 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10897 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10898 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10899 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10900 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10901 (OpNode (_.VT _.RC:$src1), 10902 (_.VT _.RC:$src2), 10903 (i32 timm:$src3))>, 10904 Sched<[sched]>; 10905 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10906 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10907 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10908 (OpNode (_.VT _.RC:$src1), 10909 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10910 (i32 timm:$src3))>, 10911 Sched<[sched.Folded, sched.ReadAfterFold]>; 10912 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10913 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10914 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10915 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10916 (OpNode (_.VT _.RC:$src1), 10917 (_.VT (_.BroadcastLdFrag addr:$src2)), 10918 (i32 timm:$src3))>, EVEX_B, 10919 Sched<[sched.Folded, sched.ReadAfterFold]>; 10920 } 10921} 10922 10923//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10924// op(reg_vec2,mem_vec,imm) 10925multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10926 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10927 X86VectorVTInfo SrcInfo>{ 10928 let ExeDomain = DestInfo.ExeDomain in { 10929 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10930 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10931 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10932 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10933 (SrcInfo.VT SrcInfo.RC:$src2), 10934 (i8 timm:$src3)))>, 10935 Sched<[sched]>; 10936 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10937 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10938 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10939 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10940 (SrcInfo.VT (bitconvert 10941 (SrcInfo.LdFrag addr:$src2))), 10942 (i8 timm:$src3)))>, 10943 Sched<[sched.Folded, sched.ReadAfterFold]>; 10944 } 10945} 10946 10947//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10948// op(reg_vec2,mem_vec,imm) 10949// op(reg_vec2,broadcast(eltVt),imm) 10950multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10951 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10952 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10953 10954 let ExeDomain = _.ExeDomain in 10955 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10956 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10957 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10958 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10959 (OpNode (_.VT _.RC:$src1), 10960 (_.VT (_.BroadcastLdFrag addr:$src2)), 10961 (i8 timm:$src3))>, EVEX_B, 10962 Sched<[sched.Folded, sched.ReadAfterFold]>; 10963} 10964 10965//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10966// op(reg_vec2,mem_scalar,imm) 10967multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10968 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10969 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10970 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10971 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10972 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10973 (OpNode (_.VT _.RC:$src1), 10974 (_.VT _.RC:$src2), 10975 (i32 timm:$src3))>, 10976 Sched<[sched]>; 10977 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10978 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10979 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10980 (OpNode (_.VT _.RC:$src1), 10981 (_.ScalarIntMemFrags addr:$src2), 10982 (i32 timm:$src3))>, 10983 Sched<[sched.Folded, sched.ReadAfterFold]>; 10984 } 10985} 10986 10987//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10988multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10989 SDNode OpNode, X86FoldableSchedWrite sched, 10990 X86VectorVTInfo _> { 10991 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10992 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10993 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10994 OpcodeStr, "$src3, {sae}, $src2, $src1", 10995 "$src1, $src2, {sae}, $src3", 10996 (OpNode (_.VT _.RC:$src1), 10997 (_.VT _.RC:$src2), 10998 (i32 timm:$src3))>, 10999 EVEX_B, Sched<[sched]>; 11000} 11001 11002//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 11003multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11004 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11005 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 11006 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11007 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 11008 OpcodeStr, "$src3, {sae}, $src2, $src1", 11009 "$src1, $src2, {sae}, $src3", 11010 (OpNode (_.VT _.RC:$src1), 11011 (_.VT _.RC:$src2), 11012 (i32 timm:$src3))>, 11013 EVEX_B, Sched<[sched]>; 11014} 11015 11016multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 11017 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 11018 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 11019 let Predicates = [prd] in { 11020 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 11021 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 11022 EVEX_V512; 11023 11024 } 11025 let Predicates = [prd, HasVLX] in { 11026 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 11027 EVEX_V128; 11028 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 11029 EVEX_V256; 11030 } 11031} 11032 11033multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 11034 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 11035 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 11036 let Predicates = [Pred] in { 11037 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 11038 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 11039 } 11040 let Predicates = [Pred, HasVLX] in { 11041 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 11042 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 11043 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 11044 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 11045 } 11046} 11047 11048multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 11049 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 11050 Predicate Pred = HasAVX512> { 11051 let Predicates = [Pred] in { 11052 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 11053 EVEX_V512; 11054 } 11055 let Predicates = [Pred, HasVLX] in { 11056 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 11057 EVEX_V128; 11058 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 11059 EVEX_V256; 11060 } 11061} 11062 11063multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 11064 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 11065 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 11066 let Predicates = [prd] in { 11067 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 11068 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 11069 } 11070} 11071 11072multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 11073 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 11074 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 11075 X86SchedWriteWidths sched, Predicate prd>{ 11076 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 11077 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 11078 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 11079 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 11080 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11081 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 11082 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 11083 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11084 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W; 11085} 11086 11087defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 11088 X86VReduce, X86VReduce, X86VReduceSAE, 11089 SchedWriteFRnd, HasDQI>; 11090defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 11091 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 11092 SchedWriteFRnd, HasAVX512>; 11093defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 11094 X86VGetMant, X86VGetMant, X86VGetMantSAE, 11095 SchedWriteFRnd, HasAVX512>; 11096 11097defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 11098 0x50, X86VRange, X86VRangeSAE, 11099 SchedWriteFAdd, HasDQI>, 11100 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11101defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 11102 0x50, X86VRange, X86VRangeSAE, 11103 SchedWriteFAdd, HasDQI>, 11104 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11105 11106defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 11107 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11108 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11109defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 11110 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11111 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11112 11113defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 11114 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11115 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11116defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 11117 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11118 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11119defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 11120 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 11121 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11122 11123defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 11124 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11125 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11126defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 11127 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11128 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11129defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 11130 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 11131 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11132 11133multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 11134 X86FoldableSchedWrite sched, 11135 X86VectorVTInfo _, 11136 X86VectorVTInfo CastInfo, 11137 string EVEX2VEXOvrd> { 11138 let ExeDomain = _.ExeDomain in { 11139 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11140 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11141 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11142 (_.VT (bitconvert 11143 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 11144 (i8 timm:$src3)))))>, 11145 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 11146 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11147 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11148 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11149 (_.VT 11150 (bitconvert 11151 (CastInfo.VT (X86Shuf128 _.RC:$src1, 11152 (CastInfo.LdFrag addr:$src2), 11153 (i8 timm:$src3)))))>, 11154 Sched<[sched.Folded, sched.ReadAfterFold]>, 11155 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 11156 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11157 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11158 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11159 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11160 (_.VT 11161 (bitconvert 11162 (CastInfo.VT 11163 (X86Shuf128 _.RC:$src1, 11164 (_.BroadcastLdFrag addr:$src2), 11165 (i8 timm:$src3)))))>, EVEX_B, 11166 Sched<[sched.Folded, sched.ReadAfterFold]>; 11167 } 11168} 11169 11170multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 11171 AVX512VLVectorVTInfo _, 11172 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 11173 string EVEX2VEXOvrd>{ 11174 let Predicates = [HasAVX512] in 11175 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11176 _.info512, CastInfo.info512, "">, EVEX_V512; 11177 11178 let Predicates = [HasAVX512, HasVLX] in 11179 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11180 _.info256, CastInfo.info256, 11181 EVEX2VEXOvrd>, EVEX_V256; 11182} 11183 11184defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 11185 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11186defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 11187 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11188defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 11189 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11190defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 11191 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11192 11193multiclass avx512_valign<bits<8> opc, string OpcodeStr, 11194 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11195 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 11196 // instantiation of this class. 11197 let ExeDomain = _.ExeDomain in { 11198 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11199 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11200 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11201 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 11202 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 11203 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11204 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11205 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11206 (_.VT (X86VAlign _.RC:$src1, 11207 (bitconvert (_.LdFrag addr:$src2)), 11208 (i8 timm:$src3)))>, 11209 Sched<[sched.Folded, sched.ReadAfterFold]>, 11210 EVEX2VEXOverride<"VPALIGNRrmi">; 11211 11212 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11213 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11214 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11215 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11216 (X86VAlign _.RC:$src1, 11217 (_.VT (_.BroadcastLdFrag addr:$src2)), 11218 (i8 timm:$src3))>, EVEX_B, 11219 Sched<[sched.Folded, sched.ReadAfterFold]>; 11220 } 11221} 11222 11223multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 11224 AVX512VLVectorVTInfo _> { 11225 let Predicates = [HasAVX512] in { 11226 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 11227 AVX512AIi8Base, EVEX_4V, EVEX_V512; 11228 } 11229 let Predicates = [HasAVX512, HasVLX] in { 11230 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 11231 AVX512AIi8Base, EVEX_4V, EVEX_V128; 11232 // We can't really override the 256-bit version so change it back to unset. 11233 let EVEX2VEXOverride = ? in 11234 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 11235 AVX512AIi8Base, EVEX_4V, EVEX_V256; 11236 } 11237} 11238 11239defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 11240 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11241defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 11242 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 11243 VEX_W; 11244 11245defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 11246 SchedWriteShuffle, avx512vl_i8_info, 11247 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 11248 11249// Fragments to help convert valignq into masked valignd. Or valignq/valignd 11250// into vpalignr. 11251def ValignqImm32XForm : SDNodeXForm<timm, [{ 11252 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 11253}]>; 11254def ValignqImm8XForm : SDNodeXForm<timm, [{ 11255 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 11256}]>; 11257def ValigndImm8XForm : SDNodeXForm<timm, [{ 11258 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 11259}]>; 11260 11261multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 11262 X86VectorVTInfo From, X86VectorVTInfo To, 11263 SDNodeXForm ImmXForm> { 11264 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11265 (bitconvert 11266 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11267 timm:$src3))), 11268 To.RC:$src0)), 11269 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 11270 To.RC:$src1, To.RC:$src2, 11271 (ImmXForm timm:$src3))>; 11272 11273 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11274 (bitconvert 11275 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11276 timm:$src3))), 11277 To.ImmAllZerosV)), 11278 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 11279 To.RC:$src1, To.RC:$src2, 11280 (ImmXForm timm:$src3))>; 11281 11282 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11283 (bitconvert 11284 (From.VT (OpNode From.RC:$src1, 11285 (From.LdFrag addr:$src2), 11286 timm:$src3))), 11287 To.RC:$src0)), 11288 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 11289 To.RC:$src1, addr:$src2, 11290 (ImmXForm timm:$src3))>; 11291 11292 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11293 (bitconvert 11294 (From.VT (OpNode From.RC:$src1, 11295 (From.LdFrag addr:$src2), 11296 timm:$src3))), 11297 To.ImmAllZerosV)), 11298 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11299 To.RC:$src1, addr:$src2, 11300 (ImmXForm timm:$src3))>; 11301} 11302 11303multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11304 X86VectorVTInfo From, 11305 X86VectorVTInfo To, 11306 SDNodeXForm ImmXForm> : 11307 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11308 def : Pat<(From.VT (OpNode From.RC:$src1, 11309 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11310 timm:$src3)), 11311 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11312 (ImmXForm timm:$src3))>; 11313 11314 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11315 (bitconvert 11316 (From.VT (OpNode From.RC:$src1, 11317 (bitconvert 11318 (To.VT (To.BroadcastLdFrag addr:$src2))), 11319 timm:$src3))), 11320 To.RC:$src0)), 11321 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11322 To.RC:$src1, addr:$src2, 11323 (ImmXForm timm:$src3))>; 11324 11325 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11326 (bitconvert 11327 (From.VT (OpNode From.RC:$src1, 11328 (bitconvert 11329 (To.VT (To.BroadcastLdFrag addr:$src2))), 11330 timm:$src3))), 11331 To.ImmAllZerosV)), 11332 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11333 To.RC:$src1, addr:$src2, 11334 (ImmXForm timm:$src3))>; 11335} 11336 11337let Predicates = [HasAVX512] in { 11338 // For 512-bit we lower to the widest element type we can. So we only need 11339 // to handle converting valignq to valignd. 11340 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11341 v16i32_info, ValignqImm32XForm>; 11342} 11343 11344let Predicates = [HasVLX] in { 11345 // For 128-bit we lower to the widest element type we can. So we only need 11346 // to handle converting valignq to valignd. 11347 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11348 v4i32x_info, ValignqImm32XForm>; 11349 // For 256-bit we lower to the widest element type we can. So we only need 11350 // to handle converting valignq to valignd. 11351 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11352 v8i32x_info, ValignqImm32XForm>; 11353} 11354 11355let Predicates = [HasVLX, HasBWI] in { 11356 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11357 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11358 v16i8x_info, ValignqImm8XForm>; 11359 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11360 v16i8x_info, ValigndImm8XForm>; 11361} 11362 11363defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11364 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11365 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 11366 11367multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11368 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11369 let ExeDomain = _.ExeDomain in { 11370 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11371 (ins _.RC:$src1), OpcodeStr, 11372 "$src1", "$src1", 11373 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11374 Sched<[sched]>; 11375 11376 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11377 (ins _.MemOp:$src1), OpcodeStr, 11378 "$src1", "$src1", 11379 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11380 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11381 Sched<[sched.Folded]>; 11382 } 11383} 11384 11385multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11386 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11387 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11388 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11389 (ins _.ScalarMemOp:$src1), OpcodeStr, 11390 "${src1}"#_.BroadcastStr, 11391 "${src1}"#_.BroadcastStr, 11392 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11393 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11394 Sched<[sched.Folded]>; 11395} 11396 11397multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11398 X86SchedWriteWidths sched, 11399 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11400 let Predicates = [prd] in 11401 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11402 EVEX_V512; 11403 11404 let Predicates = [prd, HasVLX] in { 11405 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11406 EVEX_V256; 11407 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11408 EVEX_V128; 11409 } 11410} 11411 11412multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11413 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11414 Predicate prd> { 11415 let Predicates = [prd] in 11416 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11417 EVEX_V512; 11418 11419 let Predicates = [prd, HasVLX] in { 11420 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11421 EVEX_V256; 11422 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11423 EVEX_V128; 11424 } 11425} 11426 11427multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11428 SDNode OpNode, X86SchedWriteWidths sched, 11429 Predicate prd> { 11430 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11431 avx512vl_i64_info, prd>, VEX_W; 11432 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11433 avx512vl_i32_info, prd>; 11434} 11435 11436multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11437 SDNode OpNode, X86SchedWriteWidths sched, 11438 Predicate prd> { 11439 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11440 avx512vl_i16_info, prd>, VEX_WIG; 11441 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11442 avx512vl_i8_info, prd>, VEX_WIG; 11443} 11444 11445multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11446 bits<8> opc_d, bits<8> opc_q, 11447 string OpcodeStr, SDNode OpNode, 11448 X86SchedWriteWidths sched> { 11449 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11450 HasAVX512>, 11451 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11452 HasBWI>; 11453} 11454 11455defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11456 SchedWriteVecALU>; 11457 11458// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11459let Predicates = [HasAVX512, NoVLX] in { 11460 def : Pat<(v4i64 (abs VR256X:$src)), 11461 (EXTRACT_SUBREG 11462 (VPABSQZrr 11463 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11464 sub_ymm)>; 11465 def : Pat<(v2i64 (abs VR128X:$src)), 11466 (EXTRACT_SUBREG 11467 (VPABSQZrr 11468 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11469 sub_xmm)>; 11470} 11471 11472// Use 512bit version to implement 128/256 bit. 11473multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11474 AVX512VLVectorVTInfo _, Predicate prd> { 11475 let Predicates = [prd, NoVLX] in { 11476 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11477 (EXTRACT_SUBREG 11478 (!cast<Instruction>(InstrStr # "Zrr") 11479 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11480 _.info256.RC:$src1, 11481 _.info256.SubRegIdx)), 11482 _.info256.SubRegIdx)>; 11483 11484 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11485 (EXTRACT_SUBREG 11486 (!cast<Instruction>(InstrStr # "Zrr") 11487 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11488 _.info128.RC:$src1, 11489 _.info128.SubRegIdx)), 11490 _.info128.SubRegIdx)>; 11491 } 11492} 11493 11494defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11495 SchedWriteVecIMul, HasCDI>; 11496 11497// FIXME: Is there a better scheduler class for VPCONFLICT? 11498defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11499 SchedWriteVecALU, HasCDI>; 11500 11501// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11502defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11503defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11504 11505//===---------------------------------------------------------------------===// 11506// Counts number of ones - VPOPCNTD and VPOPCNTQ 11507//===---------------------------------------------------------------------===// 11508 11509// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11510defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11511 SchedWriteVecALU, HasVPOPCNTDQ>; 11512 11513defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11514defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11515 11516//===---------------------------------------------------------------------===// 11517// Replicate Single FP - MOVSHDUP and MOVSLDUP 11518//===---------------------------------------------------------------------===// 11519 11520multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11521 X86SchedWriteWidths sched> { 11522 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11523 avx512vl_f32_info, HasAVX512>, XS; 11524} 11525 11526defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11527 SchedWriteFShuffle>; 11528defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11529 SchedWriteFShuffle>; 11530 11531//===----------------------------------------------------------------------===// 11532// AVX-512 - MOVDDUP 11533//===----------------------------------------------------------------------===// 11534 11535multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11536 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11537 let ExeDomain = _.ExeDomain in { 11538 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11539 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11540 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11541 Sched<[sched]>; 11542 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11543 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11544 (_.VT (_.BroadcastLdFrag addr:$src))>, 11545 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11546 Sched<[sched.Folded]>; 11547 } 11548} 11549 11550multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11551 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11552 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11553 VTInfo.info512>, EVEX_V512; 11554 11555 let Predicates = [HasAVX512, HasVLX] in { 11556 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11557 VTInfo.info256>, EVEX_V256; 11558 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11559 VTInfo.info128>, EVEX_V128; 11560 } 11561} 11562 11563multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11564 X86SchedWriteWidths sched> { 11565 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11566 avx512vl_f64_info>, XD, VEX_W; 11567} 11568 11569defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11570 11571let Predicates = [HasVLX] in { 11572def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11573 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11574 11575def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11576 (v2f64 VR128X:$src0)), 11577 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11578 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11579def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11580 immAllZerosV), 11581 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11582} 11583 11584//===----------------------------------------------------------------------===// 11585// AVX-512 - Unpack Instructions 11586//===----------------------------------------------------------------------===// 11587 11588let Uses = []<Register>, mayRaiseFPException = 0 in { 11589defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11590 SchedWriteFShuffleSizes, 0, 1>; 11591defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11592 SchedWriteFShuffleSizes>; 11593} 11594 11595defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11596 SchedWriteShuffle, HasBWI>; 11597defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11598 SchedWriteShuffle, HasBWI>; 11599defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11600 SchedWriteShuffle, HasBWI>; 11601defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11602 SchedWriteShuffle, HasBWI>; 11603 11604defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11605 SchedWriteShuffle, HasAVX512>; 11606defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11607 SchedWriteShuffle, HasAVX512>; 11608defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11609 SchedWriteShuffle, HasAVX512>; 11610defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11611 SchedWriteShuffle, HasAVX512>; 11612 11613//===----------------------------------------------------------------------===// 11614// AVX-512 - Extract & Insert Integer Instructions 11615//===----------------------------------------------------------------------===// 11616 11617multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11618 X86VectorVTInfo _> { 11619 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 11620 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11621 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11622 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11623 addr:$dst)]>, 11624 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11625} 11626 11627multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11628 let Predicates = [HasBWI] in { 11629 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11630 (ins _.RC:$src1, u8imm:$src2), 11631 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11632 [(set GR32orGR64:$dst, 11633 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11634 EVEX, TAPD, Sched<[WriteVecExtract]>; 11635 11636 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 11637 } 11638} 11639 11640multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11641 let Predicates = [HasBWI] in { 11642 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11643 (ins _.RC:$src1, u8imm:$src2), 11644 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11645 [(set GR32orGR64:$dst, 11646 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11647 EVEX, PD, Sched<[WriteVecExtract]>; 11648 11649 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11650 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11651 (ins _.RC:$src1, u8imm:$src2), 11652 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11653 EVEX, TAPD, FoldGenData<NAME#rr>, 11654 Sched<[WriteVecExtract]>; 11655 11656 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 11657 } 11658} 11659 11660multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11661 RegisterClass GRC> { 11662 let Predicates = [HasDQI] in { 11663 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11664 (ins _.RC:$src1, u8imm:$src2), 11665 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11666 [(set GRC:$dst, 11667 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11668 EVEX, TAPD, Sched<[WriteVecExtract]>; 11669 11670 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 11671 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11672 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11673 [(store (extractelt (_.VT _.RC:$src1), 11674 imm:$src2),addr:$dst)]>, 11675 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 11676 Sched<[WriteVecExtractSt]>; 11677 } 11678} 11679 11680defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 11681defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 11682defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11683defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 11684 11685multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11686 X86VectorVTInfo _, PatFrag LdFrag, 11687 SDPatternOperator immoperator> { 11688 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11689 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11690 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11691 [(set _.RC:$dst, 11692 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11693 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11694} 11695 11696multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11697 X86VectorVTInfo _, PatFrag LdFrag> { 11698 let Predicates = [HasBWI] in { 11699 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11700 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11701 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11702 [(set _.RC:$dst, 11703 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, 11704 Sched<[WriteVecInsert]>; 11705 11706 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11707 } 11708} 11709 11710multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11711 X86VectorVTInfo _, RegisterClass GRC> { 11712 let Predicates = [HasDQI] in { 11713 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11714 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11715 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11716 [(set _.RC:$dst, 11717 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11718 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 11719 11720 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11721 _.ScalarLdFrag, imm>, TAPD; 11722 } 11723} 11724 11725defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11726 extloadi8>, TAPD, VEX_WIG; 11727defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11728 extloadi16>, PD, VEX_WIG; 11729defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11730defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 11731 11732let Predicates = [HasAVX512, NoBWI] in { 11733 def : Pat<(X86pinsrb VR128:$src1, 11734 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11735 timm:$src3), 11736 (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11737 timm:$src3)>; 11738} 11739 11740let Predicates = [HasBWI] in { 11741 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), 11742 (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 11743 GR8:$src2, sub_8bit), timm:$src3)>; 11744 def : Pat<(X86pinsrb VR128:$src1, 11745 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11746 timm:$src3), 11747 (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11748 timm:$src3)>; 11749} 11750 11751// Always select FP16 instructions if available. 11752let Predicates = [HasBWI], AddedComplexity = -10 in { 11753 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; 11754 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 11755 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>; 11756 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>; 11757} 11758 11759//===----------------------------------------------------------------------===// 11760// VSHUFPS - VSHUFPD Operations 11761//===----------------------------------------------------------------------===// 11762 11763multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11764 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11765 SchedWriteFShuffle>, 11766 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11767 AVX512AIi8Base, EVEX_4V; 11768} 11769 11770defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS; 11771defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W; 11772 11773//===----------------------------------------------------------------------===// 11774// AVX-512 - Byte shift Left/Right 11775//===----------------------------------------------------------------------===// 11776 11777multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11778 Format MRMm, string OpcodeStr, 11779 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11780 def ri : AVX512<opc, MRMr, 11781 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11782 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11783 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11784 Sched<[sched]>; 11785 def mi : AVX512<opc, MRMm, 11786 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11787 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11788 [(set _.RC:$dst,(_.VT (OpNode 11789 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11790 (i8 timm:$src2))))]>, 11791 Sched<[sched.Folded, sched.ReadAfterFold]>; 11792} 11793 11794multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11795 Format MRMm, string OpcodeStr, 11796 X86SchedWriteWidths sched, Predicate prd>{ 11797 let Predicates = [prd] in 11798 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11799 sched.ZMM, v64i8_info>, EVEX_V512; 11800 let Predicates = [prd, HasVLX] in { 11801 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11802 sched.YMM, v32i8x_info>, EVEX_V256; 11803 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11804 sched.XMM, v16i8x_info>, EVEX_V128; 11805 } 11806} 11807defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11808 SchedWriteShuffle, HasBWI>, 11809 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11810defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11811 SchedWriteShuffle, HasBWI>, 11812 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11813 11814multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11815 string OpcodeStr, X86FoldableSchedWrite sched, 11816 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11817 let isCommutable = 1 in 11818 def rr : AVX512BI<opc, MRMSrcReg, 11819 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11820 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11821 [(set _dst.RC:$dst,(_dst.VT 11822 (OpNode (_src.VT _src.RC:$src1), 11823 (_src.VT _src.RC:$src2))))]>, 11824 Sched<[sched]>; 11825 def rm : AVX512BI<opc, MRMSrcMem, 11826 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11827 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11828 [(set _dst.RC:$dst,(_dst.VT 11829 (OpNode (_src.VT _src.RC:$src1), 11830 (_src.VT (bitconvert 11831 (_src.LdFrag addr:$src2))))))]>, 11832 Sched<[sched.Folded, sched.ReadAfterFold]>; 11833} 11834 11835multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11836 string OpcodeStr, X86SchedWriteWidths sched, 11837 Predicate prd> { 11838 let Predicates = [prd] in 11839 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11840 v8i64_info, v64i8_info>, EVEX_V512; 11841 let Predicates = [prd, HasVLX] in { 11842 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11843 v4i64x_info, v32i8x_info>, EVEX_V256; 11844 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11845 v2i64x_info, v16i8x_info>, EVEX_V128; 11846 } 11847} 11848 11849defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11850 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11851 11852// Transforms to swizzle an immediate to enable better matching when 11853// memory operand isn't in the right place. 11854def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11855 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11856 uint8_t Imm = N->getZExtValue(); 11857 // Swap bits 1/4 and 3/6. 11858 uint8_t NewImm = Imm & 0xa5; 11859 if (Imm & 0x02) NewImm |= 0x10; 11860 if (Imm & 0x10) NewImm |= 0x02; 11861 if (Imm & 0x08) NewImm |= 0x40; 11862 if (Imm & 0x40) NewImm |= 0x08; 11863 return getI8Imm(NewImm, SDLoc(N)); 11864}]>; 11865def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11866 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11867 uint8_t Imm = N->getZExtValue(); 11868 // Swap bits 2/4 and 3/5. 11869 uint8_t NewImm = Imm & 0xc3; 11870 if (Imm & 0x04) NewImm |= 0x10; 11871 if (Imm & 0x10) NewImm |= 0x04; 11872 if (Imm & 0x08) NewImm |= 0x20; 11873 if (Imm & 0x20) NewImm |= 0x08; 11874 return getI8Imm(NewImm, SDLoc(N)); 11875}]>; 11876def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11877 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11878 uint8_t Imm = N->getZExtValue(); 11879 // Swap bits 1/2 and 5/6. 11880 uint8_t NewImm = Imm & 0x99; 11881 if (Imm & 0x02) NewImm |= 0x04; 11882 if (Imm & 0x04) NewImm |= 0x02; 11883 if (Imm & 0x20) NewImm |= 0x40; 11884 if (Imm & 0x40) NewImm |= 0x20; 11885 return getI8Imm(NewImm, SDLoc(N)); 11886}]>; 11887def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11888 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11889 uint8_t Imm = N->getZExtValue(); 11890 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11891 uint8_t NewImm = Imm & 0x81; 11892 if (Imm & 0x02) NewImm |= 0x04; 11893 if (Imm & 0x04) NewImm |= 0x10; 11894 if (Imm & 0x08) NewImm |= 0x40; 11895 if (Imm & 0x10) NewImm |= 0x02; 11896 if (Imm & 0x20) NewImm |= 0x08; 11897 if (Imm & 0x40) NewImm |= 0x20; 11898 return getI8Imm(NewImm, SDLoc(N)); 11899}]>; 11900def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11901 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11902 uint8_t Imm = N->getZExtValue(); 11903 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11904 uint8_t NewImm = Imm & 0x81; 11905 if (Imm & 0x02) NewImm |= 0x10; 11906 if (Imm & 0x04) NewImm |= 0x02; 11907 if (Imm & 0x08) NewImm |= 0x20; 11908 if (Imm & 0x10) NewImm |= 0x04; 11909 if (Imm & 0x20) NewImm |= 0x40; 11910 if (Imm & 0x40) NewImm |= 0x08; 11911 return getI8Imm(NewImm, SDLoc(N)); 11912}]>; 11913 11914multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11915 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11916 string Name>{ 11917 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11918 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11919 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11920 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11921 (OpNode (_.VT _.RC:$src1), 11922 (_.VT _.RC:$src2), 11923 (_.VT _.RC:$src3), 11924 (i8 timm:$src4)), 1, 1>, 11925 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11926 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11927 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11928 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11929 (OpNode (_.VT _.RC:$src1), 11930 (_.VT _.RC:$src2), 11931 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11932 (i8 timm:$src4)), 1, 0>, 11933 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11934 Sched<[sched.Folded, sched.ReadAfterFold]>; 11935 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11936 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11937 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11938 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11939 (OpNode (_.VT _.RC:$src1), 11940 (_.VT _.RC:$src2), 11941 (_.VT (_.BroadcastLdFrag addr:$src3)), 11942 (i8 timm:$src4)), 1, 0>, EVEX_B, 11943 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11944 Sched<[sched.Folded, sched.ReadAfterFold]>; 11945 }// Constraints = "$src1 = $dst" 11946 11947 // Additional patterns for matching passthru operand in other positions. 11948 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11949 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11950 _.RC:$src1)), 11951 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11952 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11953 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11954 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11955 _.RC:$src1)), 11956 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11957 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11958 11959 // Additional patterns for matching zero masking with loads in other 11960 // positions. 11961 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11962 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11963 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11964 _.ImmAllZerosV)), 11965 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11966 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11967 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11968 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11969 _.RC:$src2, (i8 timm:$src4)), 11970 _.ImmAllZerosV)), 11971 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11972 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11973 11974 // Additional patterns for matching masked loads with different 11975 // operand orders. 11976 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11977 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11978 _.RC:$src2, (i8 timm:$src4)), 11979 _.RC:$src1)), 11980 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11981 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11982 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11983 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11984 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11985 _.RC:$src1)), 11986 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11987 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11988 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11989 (OpNode _.RC:$src2, _.RC:$src1, 11990 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11991 _.RC:$src1)), 11992 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11993 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11994 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11995 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11996 _.RC:$src1, (i8 timm:$src4)), 11997 _.RC:$src1)), 11998 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11999 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 12000 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12001 (OpNode (bitconvert (_.LdFrag addr:$src3)), 12002 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 12003 _.RC:$src1)), 12004 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 12005 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 12006 12007 // Additional patterns for matching zero masking with broadcasts in other 12008 // positions. 12009 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12010 (OpNode (_.BroadcastLdFrag addr:$src3), 12011 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 12012 _.ImmAllZerosV)), 12013 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 12014 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 12015 (VPTERNLOG321_imm8 timm:$src4))>; 12016 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12017 (OpNode _.RC:$src1, 12018 (_.BroadcastLdFrag addr:$src3), 12019 _.RC:$src2, (i8 timm:$src4)), 12020 _.ImmAllZerosV)), 12021 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 12022 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 12023 (VPTERNLOG132_imm8 timm:$src4))>; 12024 12025 // Additional patterns for matching masked broadcasts with different 12026 // operand orders. 12027 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12028 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 12029 _.RC:$src2, (i8 timm:$src4)), 12030 _.RC:$src1)), 12031 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12032 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 12033 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12034 (OpNode (_.BroadcastLdFrag addr:$src3), 12035 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 12036 _.RC:$src1)), 12037 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12038 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 12039 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12040 (OpNode _.RC:$src2, _.RC:$src1, 12041 (_.BroadcastLdFrag addr:$src3), 12042 (i8 timm:$src4)), _.RC:$src1)), 12043 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12044 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 12045 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12046 (OpNode _.RC:$src2, 12047 (_.BroadcastLdFrag addr:$src3), 12048 _.RC:$src1, (i8 timm:$src4)), 12049 _.RC:$src1)), 12050 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12051 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 12052 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12053 (OpNode (_.BroadcastLdFrag addr:$src3), 12054 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 12055 _.RC:$src1)), 12056 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12057 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 12058} 12059 12060multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 12061 AVX512VLVectorVTInfo _> { 12062 let Predicates = [HasAVX512] in 12063 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 12064 _.info512, NAME>, EVEX_V512; 12065 let Predicates = [HasAVX512, HasVLX] in { 12066 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 12067 _.info128, NAME>, EVEX_V128; 12068 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 12069 _.info256, NAME>, EVEX_V256; 12070 } 12071} 12072 12073defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 12074 avx512vl_i32_info>; 12075defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 12076 avx512vl_i64_info>, VEX_W; 12077 12078// Patterns to implement vnot using vpternlog instead of creating all ones 12079// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 12080// so that the result is only dependent on src0. But we use the same source 12081// for all operands to prevent a false dependency. 12082// TODO: We should maybe have a more generalized algorithm for folding to 12083// vpternlog. 12084let Predicates = [HasAVX512] in { 12085 def : Pat<(v64i8 (vnot VR512:$src)), 12086 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 12087 def : Pat<(v32i16 (vnot VR512:$src)), 12088 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 12089 def : Pat<(v16i32 (vnot VR512:$src)), 12090 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 12091 def : Pat<(v8i64 (vnot VR512:$src)), 12092 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 12093} 12094 12095let Predicates = [HasAVX512, NoVLX] in { 12096 def : Pat<(v16i8 (vnot VR128X:$src)), 12097 (EXTRACT_SUBREG 12098 (VPTERNLOGQZrri 12099 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12100 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12101 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12102 (i8 15)), sub_xmm)>; 12103 def : Pat<(v8i16 (vnot VR128X:$src)), 12104 (EXTRACT_SUBREG 12105 (VPTERNLOGQZrri 12106 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12107 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12108 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12109 (i8 15)), sub_xmm)>; 12110 def : Pat<(v4i32 (vnot VR128X:$src)), 12111 (EXTRACT_SUBREG 12112 (VPTERNLOGQZrri 12113 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12114 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12115 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12116 (i8 15)), sub_xmm)>; 12117 def : Pat<(v2i64 (vnot VR128X:$src)), 12118 (EXTRACT_SUBREG 12119 (VPTERNLOGQZrri 12120 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12121 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12122 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12123 (i8 15)), sub_xmm)>; 12124 12125 def : Pat<(v32i8 (vnot VR256X:$src)), 12126 (EXTRACT_SUBREG 12127 (VPTERNLOGQZrri 12128 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12129 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12130 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12131 (i8 15)), sub_ymm)>; 12132 def : Pat<(v16i16 (vnot VR256X:$src)), 12133 (EXTRACT_SUBREG 12134 (VPTERNLOGQZrri 12135 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12136 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12137 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12138 (i8 15)), sub_ymm)>; 12139 def : Pat<(v8i32 (vnot VR256X:$src)), 12140 (EXTRACT_SUBREG 12141 (VPTERNLOGQZrri 12142 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12143 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12144 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12145 (i8 15)), sub_ymm)>; 12146 def : Pat<(v4i64 (vnot VR256X:$src)), 12147 (EXTRACT_SUBREG 12148 (VPTERNLOGQZrri 12149 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12150 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12151 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12152 (i8 15)), sub_ymm)>; 12153} 12154 12155let Predicates = [HasVLX] in { 12156 def : Pat<(v16i8 (vnot VR128X:$src)), 12157 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12158 def : Pat<(v8i16 (vnot VR128X:$src)), 12159 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12160 def : Pat<(v4i32 (vnot VR128X:$src)), 12161 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12162 def : Pat<(v2i64 (vnot VR128X:$src)), 12163 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12164 12165 def : Pat<(v32i8 (vnot VR256X:$src)), 12166 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12167 def : Pat<(v16i16 (vnot VR256X:$src)), 12168 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12169 def : Pat<(v8i32 (vnot VR256X:$src)), 12170 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12171 def : Pat<(v4i64 (vnot VR256X:$src)), 12172 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12173} 12174 12175//===----------------------------------------------------------------------===// 12176// AVX-512 - FixupImm 12177//===----------------------------------------------------------------------===// 12178 12179multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 12180 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12181 X86VectorVTInfo TblVT>{ 12182 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 12183 Uses = [MXCSR], mayRaiseFPException = 1 in { 12184 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12185 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12186 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12187 (X86VFixupimm (_.VT _.RC:$src1), 12188 (_.VT _.RC:$src2), 12189 (TblVT.VT _.RC:$src3), 12190 (i32 timm:$src4))>, Sched<[sched]>; 12191 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12192 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 12193 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12194 (X86VFixupimm (_.VT _.RC:$src1), 12195 (_.VT _.RC:$src2), 12196 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 12197 (i32 timm:$src4))>, 12198 Sched<[sched.Folded, sched.ReadAfterFold]>; 12199 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12200 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12201 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 12202 "$src2, ${src3}"#_.BroadcastStr#", $src4", 12203 (X86VFixupimm (_.VT _.RC:$src1), 12204 (_.VT _.RC:$src2), 12205 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 12206 (i32 timm:$src4))>, 12207 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12208 } // Constraints = "$src1 = $dst" 12209} 12210 12211multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 12212 X86FoldableSchedWrite sched, 12213 X86VectorVTInfo _, X86VectorVTInfo TblVT> 12214 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 12215let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 12216 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12217 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12218 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12219 "$src2, $src3, {sae}, $src4", 12220 (X86VFixupimmSAE (_.VT _.RC:$src1), 12221 (_.VT _.RC:$src2), 12222 (TblVT.VT _.RC:$src3), 12223 (i32 timm:$src4))>, 12224 EVEX_B, Sched<[sched]>; 12225 } 12226} 12227 12228multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 12229 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12230 X86VectorVTInfo _src3VT> { 12231 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 12232 ExeDomain = _.ExeDomain in { 12233 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12234 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12235 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12236 (X86VFixupimms (_.VT _.RC:$src1), 12237 (_.VT _.RC:$src2), 12238 (_src3VT.VT _src3VT.RC:$src3), 12239 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 12240 let Uses = [MXCSR] in 12241 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12242 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12243 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12244 "$src2, $src3, {sae}, $src4", 12245 (X86VFixupimmSAEs (_.VT _.RC:$src1), 12246 (_.VT _.RC:$src2), 12247 (_src3VT.VT _src3VT.RC:$src3), 12248 (i32 timm:$src4))>, 12249 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12250 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 12251 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12252 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12253 (X86VFixupimms (_.VT _.RC:$src1), 12254 (_.VT _.RC:$src2), 12255 (_src3VT.VT (scalar_to_vector 12256 (_src3VT.ScalarLdFrag addr:$src3))), 12257 (i32 timm:$src4))>, 12258 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 12259 } 12260} 12261 12262multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 12263 AVX512VLVectorVTInfo _Vec, 12264 AVX512VLVectorVTInfo _Tbl> { 12265 let Predicates = [HasAVX512] in 12266 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 12267 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 12268 EVEX_4V, EVEX_V512; 12269 let Predicates = [HasAVX512, HasVLX] in { 12270 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 12271 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 12272 EVEX_4V, EVEX_V128; 12273 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 12274 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 12275 EVEX_4V, EVEX_V256; 12276 } 12277} 12278 12279defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12280 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 12281 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 12282defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12283 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 12284 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 12285defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 12286 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12287defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 12288 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 12289 12290// Patterns used to select SSE scalar fp arithmetic instructions from 12291// either: 12292// 12293// (1) a scalar fp operation followed by a blend 12294// 12295// The effect is that the backend no longer emits unnecessary vector 12296// insert instructions immediately after SSE scalar fp instructions 12297// like addss or mulss. 12298// 12299// For example, given the following code: 12300// __m128 foo(__m128 A, __m128 B) { 12301// A[0] += B[0]; 12302// return A; 12303// } 12304// 12305// Previously we generated: 12306// addss %xmm0, %xmm1 12307// movss %xmm1, %xmm0 12308// 12309// We now generate: 12310// addss %xmm1, %xmm0 12311// 12312// (2) a vector packed single/double fp operation followed by a vector insert 12313// 12314// The effect is that the backend converts the packed fp instruction 12315// followed by a vector insert into a single SSE scalar fp instruction. 12316// 12317// For example, given the following code: 12318// __m128 foo(__m128 A, __m128 B) { 12319// __m128 C = A + B; 12320// return (__m128) {c[0], a[1], a[2], a[3]}; 12321// } 12322// 12323// Previously we generated: 12324// addps %xmm0, %xmm1 12325// movss %xmm1, %xmm0 12326// 12327// We now generate: 12328// addss %xmm1, %xmm0 12329 12330// TODO: Some canonicalization in lowering would simplify the number of 12331// patterns we have to try to match. 12332multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12333 string OpcPrefix, SDNode MoveNode, 12334 X86VectorVTInfo _, PatLeaf ZeroFP> { 12335 let Predicates = [HasAVX512] in { 12336 // extracted scalar math op with insert via movss 12337 def : Pat<(MoveNode 12338 (_.VT VR128X:$dst), 12339 (_.VT (scalar_to_vector 12340 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12341 _.FRC:$src)))), 12342 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12343 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12344 def : Pat<(MoveNode 12345 (_.VT VR128X:$dst), 12346 (_.VT (scalar_to_vector 12347 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12348 (_.ScalarLdFrag addr:$src))))), 12349 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12350 12351 // extracted masked scalar math op with insert via movss 12352 def : Pat<(MoveNode (_.VT VR128X:$src1), 12353 (scalar_to_vector 12354 (X86selects_mask VK1WM:$mask, 12355 (MaskedOp (_.EltVT 12356 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12357 _.FRC:$src2), 12358 _.FRC:$src0))), 12359 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12360 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12361 VK1WM:$mask, _.VT:$src1, 12362 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12363 def : Pat<(MoveNode (_.VT VR128X:$src1), 12364 (scalar_to_vector 12365 (X86selects_mask VK1WM:$mask, 12366 (MaskedOp (_.EltVT 12367 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12368 (_.ScalarLdFrag addr:$src2)), 12369 _.FRC:$src0))), 12370 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12371 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12372 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12373 12374 // extracted masked scalar math op with insert via movss 12375 def : Pat<(MoveNode (_.VT VR128X:$src1), 12376 (scalar_to_vector 12377 (X86selects_mask VK1WM:$mask, 12378 (MaskedOp (_.EltVT 12379 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12380 _.FRC:$src2), (_.EltVT ZeroFP)))), 12381 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12382 VK1WM:$mask, _.VT:$src1, 12383 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12384 def : Pat<(MoveNode (_.VT VR128X:$src1), 12385 (scalar_to_vector 12386 (X86selects_mask VK1WM:$mask, 12387 (MaskedOp (_.EltVT 12388 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12389 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12390 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12391 } 12392} 12393 12394defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12395defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12396defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12397defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12398 12399defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12400defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12401defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12402defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12403 12404defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12405defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12406defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12407defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12408 12409multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12410 SDNode Move, X86VectorVTInfo _> { 12411 let Predicates = [HasAVX512] in { 12412 def : Pat<(_.VT (Move _.VT:$dst, 12413 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12414 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12415 } 12416} 12417 12418defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12419defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12420defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12421 12422//===----------------------------------------------------------------------===// 12423// AES instructions 12424//===----------------------------------------------------------------------===// 12425 12426multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12427 let Predicates = [HasVLX, HasVAES] in { 12428 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12429 !cast<Intrinsic>(IntPrefix), 12430 loadv2i64, 0, VR128X, i128mem>, 12431 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 12432 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12433 !cast<Intrinsic>(IntPrefix#"_256"), 12434 loadv4i64, 0, VR256X, i256mem>, 12435 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 12436 } 12437 let Predicates = [HasAVX512, HasVAES] in 12438 defm Z : AESI_binop_rm_int<Op, OpStr, 12439 !cast<Intrinsic>(IntPrefix#"_512"), 12440 loadv8i64, 0, VR512, i512mem>, 12441 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 12442} 12443 12444defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12445defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12446defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12447defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12448 12449//===----------------------------------------------------------------------===// 12450// PCLMUL instructions - Carry less multiplication 12451//===----------------------------------------------------------------------===// 12452 12453let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12454defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12455 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 12456 12457let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12458defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12459 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 12460 12461defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12462 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 12463 EVEX_CD8<64, CD8VF>, VEX_WIG; 12464} 12465 12466// Aliases 12467defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12468defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12469defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12470 12471//===----------------------------------------------------------------------===// 12472// VBMI2 12473//===----------------------------------------------------------------------===// 12474 12475multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12476 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12477 let Constraints = "$src1 = $dst", 12478 ExeDomain = VTI.ExeDomain in { 12479 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12480 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12481 "$src3, $src2", "$src2, $src3", 12482 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12483 T8PD, EVEX_4V, Sched<[sched]>; 12484 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12485 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12486 "$src3, $src2", "$src2, $src3", 12487 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12488 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12489 T8PD, EVEX_4V, 12490 Sched<[sched.Folded, sched.ReadAfterFold]>; 12491 } 12492} 12493 12494multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12495 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12496 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12497 let Constraints = "$src1 = $dst", 12498 ExeDomain = VTI.ExeDomain in 12499 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12500 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12501 "${src3}"#VTI.BroadcastStr#", $src2", 12502 "$src2, ${src3}"#VTI.BroadcastStr, 12503 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12504 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12505 T8PD, EVEX_4V, EVEX_B, 12506 Sched<[sched.Folded, sched.ReadAfterFold]>; 12507} 12508 12509multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12510 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12511 let Predicates = [HasVBMI2] in 12512 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12513 EVEX_V512; 12514 let Predicates = [HasVBMI2, HasVLX] in { 12515 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12516 EVEX_V256; 12517 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12518 EVEX_V128; 12519 } 12520} 12521 12522multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12523 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12524 let Predicates = [HasVBMI2] in 12525 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12526 EVEX_V512; 12527 let Predicates = [HasVBMI2, HasVLX] in { 12528 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12529 EVEX_V256; 12530 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12531 EVEX_V128; 12532 } 12533} 12534multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12535 SDNode OpNode, X86SchedWriteWidths sched> { 12536 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12537 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 12538 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12539 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12540 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12541 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 12542} 12543 12544multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12545 SDNode OpNode, X86SchedWriteWidths sched> { 12546 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12547 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12548 VEX_W, EVEX_CD8<16, CD8VF>; 12549 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12550 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 12551 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12552 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 12553} 12554 12555// Concat & Shift 12556defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12557defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12558defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12559defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12560 12561// Compress 12562defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12563 avx512vl_i8_info, HasVBMI2>, EVEX, 12564 NotMemoryFoldable; 12565defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12566 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 12567 NotMemoryFoldable; 12568// Expand 12569defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12570 avx512vl_i8_info, HasVBMI2>, EVEX; 12571defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12572 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 12573 12574//===----------------------------------------------------------------------===// 12575// VNNI 12576//===----------------------------------------------------------------------===// 12577 12578let Constraints = "$src1 = $dst" in 12579multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12580 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12581 bit IsCommutable> { 12582 let ExeDomain = VTI.ExeDomain in { 12583 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12584 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12585 "$src3, $src2", "$src2, $src3", 12586 (VTI.VT (OpNode VTI.RC:$src1, 12587 VTI.RC:$src2, VTI.RC:$src3)), 12588 IsCommutable, IsCommutable>, 12589 EVEX_4V, T8PD, Sched<[sched]>; 12590 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12591 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12592 "$src3, $src2", "$src2, $src3", 12593 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12594 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12595 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 12596 Sched<[sched.Folded, sched.ReadAfterFold]>; 12597 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12598 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12599 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12600 "$src2, ${src3}"#VTI.BroadcastStr, 12601 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12602 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12603 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 12604 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 12605 } 12606} 12607 12608multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12609 X86SchedWriteWidths sched, bit IsCommutable> { 12610 let Predicates = [HasVNNI] in 12611 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12612 IsCommutable>, EVEX_V512; 12613 let Predicates = [HasVNNI, HasVLX] in { 12614 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12615 IsCommutable>, EVEX_V256; 12616 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12617 IsCommutable>, EVEX_V128; 12618 } 12619} 12620 12621// FIXME: Is there a better scheduler class for VPDP? 12622defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12623defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12624defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12625defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12626 12627// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12628let Predicates = [HasVNNI] in { 12629 def : Pat<(v16i32 (add VR512:$src1, 12630 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12631 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12632 def : Pat<(v16i32 (add VR512:$src1, 12633 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12634 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12635} 12636let Predicates = [HasVNNI,HasVLX] in { 12637 def : Pat<(v8i32 (add VR256X:$src1, 12638 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12639 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12640 def : Pat<(v8i32 (add VR256X:$src1, 12641 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12642 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12643 def : Pat<(v4i32 (add VR128X:$src1, 12644 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12645 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12646 def : Pat<(v4i32 (add VR128X:$src1, 12647 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12648 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12649} 12650 12651//===----------------------------------------------------------------------===// 12652// Bit Algorithms 12653//===----------------------------------------------------------------------===// 12654 12655// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12656defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12657 avx512vl_i8_info, HasBITALG>; 12658defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12659 avx512vl_i16_info, HasBITALG>, VEX_W; 12660 12661defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12662defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12663 12664def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 12665 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 12666 return N->hasOneUse(); 12667}]>; 12668 12669multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12670 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12671 (ins VTI.RC:$src1, VTI.RC:$src2), 12672 "vpshufbitqmb", 12673 "$src2, $src1", "$src1, $src2", 12674 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12675 (VTI.VT VTI.RC:$src2)), 12676 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12677 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 12678 Sched<[sched]>; 12679 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12680 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12681 "vpshufbitqmb", 12682 "$src2, $src1", "$src1, $src2", 12683 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12684 (VTI.VT (VTI.LdFrag addr:$src2))), 12685 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12686 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12687 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 12688 Sched<[sched.Folded, sched.ReadAfterFold]>; 12689} 12690 12691multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12692 let Predicates = [HasBITALG] in 12693 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12694 let Predicates = [HasBITALG, HasVLX] in { 12695 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12696 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12697 } 12698} 12699 12700// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12701defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12702 12703//===----------------------------------------------------------------------===// 12704// GFNI 12705//===----------------------------------------------------------------------===// 12706 12707multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12708 X86SchedWriteWidths sched> { 12709 let Predicates = [HasGFNI, HasAVX512] in 12710 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12711 EVEX_V512; 12712 let Predicates = [HasGFNI, HasVLX] in { 12713 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12714 EVEX_V256; 12715 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12716 EVEX_V128; 12717 } 12718} 12719 12720defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12721 SchedWriteVecALU>, 12722 EVEX_CD8<8, CD8VF>, T8PD; 12723 12724multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12725 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12726 X86VectorVTInfo BcstVTI> 12727 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12728 let ExeDomain = VTI.ExeDomain in 12729 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12730 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12731 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12732 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12733 (OpNode (VTI.VT VTI.RC:$src1), 12734 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12735 (i8 timm:$src3))>, EVEX_B, 12736 Sched<[sched.Folded, sched.ReadAfterFold]>; 12737} 12738 12739multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12740 X86SchedWriteWidths sched> { 12741 let Predicates = [HasGFNI, HasAVX512] in 12742 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12743 v64i8_info, v8i64_info>, EVEX_V512; 12744 let Predicates = [HasGFNI, HasVLX] in { 12745 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12746 v32i8x_info, v4i64x_info>, EVEX_V256; 12747 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12748 v16i8x_info, v2i64x_info>, EVEX_V128; 12749 } 12750} 12751 12752defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12753 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12754 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12755defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12756 X86GF2P8affineqb, SchedWriteVecIMul>, 12757 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12758 12759 12760//===----------------------------------------------------------------------===// 12761// AVX5124FMAPS 12762//===----------------------------------------------------------------------===// 12763 12764let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12765 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12766defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12767 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12768 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12769 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12770 Sched<[SchedWriteFMA.ZMM.Folded]>; 12771 12772defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12773 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12774 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12775 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12776 Sched<[SchedWriteFMA.ZMM.Folded]>; 12777 12778defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12779 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12780 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12781 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12782 Sched<[SchedWriteFMA.Scl.Folded]>; 12783 12784defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12785 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12786 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12787 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12788 Sched<[SchedWriteFMA.Scl.Folded]>; 12789} 12790 12791//===----------------------------------------------------------------------===// 12792// AVX5124VNNIW 12793//===----------------------------------------------------------------------===// 12794 12795let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12796 Constraints = "$src1 = $dst" in { 12797defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12798 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12799 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12800 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12801 Sched<[SchedWriteFMA.ZMM.Folded]>; 12802 12803defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12804 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12805 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12806 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12807 Sched<[SchedWriteFMA.ZMM.Folded]>; 12808} 12809 12810let hasSideEffects = 0 in { 12811 let mayStore = 1, SchedRW = [WriteFStoreX] in 12812 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12813 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12814 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12815} 12816 12817//===----------------------------------------------------------------------===// 12818// VP2INTERSECT 12819//===----------------------------------------------------------------------===// 12820 12821multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12822 def rr : I<0x68, MRMSrcReg, 12823 (outs _.KRPC:$dst), 12824 (ins _.RC:$src1, _.RC:$src2), 12825 !strconcat("vp2intersect", _.Suffix, 12826 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12827 [(set _.KRPC:$dst, (X86vp2intersect 12828 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12829 EVEX_4V, T8XD, Sched<[sched]>; 12830 12831 def rm : I<0x68, MRMSrcMem, 12832 (outs _.KRPC:$dst), 12833 (ins _.RC:$src1, _.MemOp:$src2), 12834 !strconcat("vp2intersect", _.Suffix, 12835 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12836 [(set _.KRPC:$dst, (X86vp2intersect 12837 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12838 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12839 Sched<[sched.Folded, sched.ReadAfterFold]>; 12840 12841 def rmb : I<0x68, MRMSrcMem, 12842 (outs _.KRPC:$dst), 12843 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12844 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12845 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12846 [(set _.KRPC:$dst, (X86vp2intersect 12847 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12848 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12849 Sched<[sched.Folded, sched.ReadAfterFold]>; 12850} 12851 12852multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12853 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12854 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12855 12856 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12857 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12858 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12859 } 12860} 12861 12862defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12863defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W; 12864 12865multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12866 X86SchedWriteWidths sched, 12867 AVX512VLVectorVTInfo _SrcVTInfo, 12868 AVX512VLVectorVTInfo _DstVTInfo, 12869 SDNode OpNode, Predicate prd, 12870 bit IsCommutable = 0> { 12871 let Predicates = [prd] in 12872 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12873 _SrcVTInfo.info512, _DstVTInfo.info512, 12874 _SrcVTInfo.info512, IsCommutable>, 12875 EVEX_V512, EVEX_CD8<32, CD8VF>; 12876 let Predicates = [HasVLX, prd] in { 12877 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12878 _SrcVTInfo.info256, _DstVTInfo.info256, 12879 _SrcVTInfo.info256, IsCommutable>, 12880 EVEX_V256, EVEX_CD8<32, CD8VF>; 12881 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12882 _SrcVTInfo.info128, _DstVTInfo.info128, 12883 _SrcVTInfo.info128, IsCommutable>, 12884 EVEX_V128, EVEX_CD8<32, CD8VF>; 12885 } 12886} 12887 12888let ExeDomain = SSEPackedSingle in 12889defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12890 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12891 avx512vl_f32_info, avx512vl_bf16_info, 12892 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12893 12894// Truncate Float to BFloat16 12895multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12896 X86SchedWriteWidths sched> { 12897 let ExeDomain = SSEPackedSingle in { 12898 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12899 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info, 12900 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12901 } 12902 let Predicates = [HasBF16, HasVLX] in { 12903 let Uses = []<Register>, mayRaiseFPException = 0 in { 12904 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info, 12905 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12906 VK4WM>, EVEX_V128; 12907 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info, 12908 X86cvtneps2bf16, X86cvtneps2bf16, 12909 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12910 } 12911 } // Predicates = [HasBF16, HasVLX] 12912 } // ExeDomain = SSEPackedSingle 12913 12914 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12915 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12916 VR128X:$src), 0>; 12917 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12918 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12919 f128mem:$src), 0, "intel">; 12920 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12921 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12922 VR256X:$src), 0>; 12923 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12924 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12925 f256mem:$src), 0, "intel">; 12926} 12927 12928defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12929 SchedWriteCvtPD2PS>, T8XS, 12930 EVEX_CD8<32, CD8VF>; 12931 12932let Predicates = [HasBF16, HasVLX] in { 12933 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12934 // patterns have been disabled with null_frag. 12935 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12936 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12937 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0), 12938 VK4WM:$mask), 12939 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12940 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV, 12941 VK4WM:$mask), 12942 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12943 12944 def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12945 (VCVTNEPS2BF16Z128rm addr:$src)>; 12946 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0), 12947 VK4WM:$mask), 12948 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12949 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV, 12950 VK4WM:$mask), 12951 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12952 12953 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 12954 (X86VBroadcastld32 addr:$src)))), 12955 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12956 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12957 (v8bf16 VR128X:$src0), VK4WM:$mask), 12958 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12959 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12960 v8bf16x_info.ImmAllZerosV, VK4WM:$mask), 12961 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12962 12963 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))), 12964 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12965 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))), 12966 (VCVTNEPS2BF16Z128rm addr:$src)>; 12967 12968 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))), 12969 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12970 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))), 12971 (VCVTNEPS2BF16Z256rm addr:$src)>; 12972 12973 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)), 12974 (VPBROADCASTWZ128rm addr:$src)>; 12975 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)), 12976 (VPBROADCASTWZ256rm addr:$src)>; 12977 12978 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12979 (VPBROADCASTWZ128rr VR128X:$src)>; 12980 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12981 (VPBROADCASTWZ256rr VR128X:$src)>; 12982 12983 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12984} 12985 12986let Predicates = [HasBF16] in { 12987 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)), 12988 (VPBROADCASTWZrm addr:$src)>; 12989 12990 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12991 (VPBROADCASTWZrr VR128X:$src)>; 12992 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12993} 12994 12995let Constraints = "$src1 = $dst" in { 12996multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12997 X86FoldableSchedWrite sched, 12998 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12999 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13000 (ins src_v.RC:$src2, src_v.RC:$src3), 13001 OpcodeStr, "$src3, $src2", "$src2, $src3", 13002 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 13003 EVEX_4V, Sched<[sched]>; 13004 13005 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13006 (ins src_v.RC:$src2, src_v.MemOp:$src3), 13007 OpcodeStr, "$src3, $src2", "$src2, $src3", 13008 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 13009 (src_v.LdFrag addr:$src3)))>, EVEX_4V, 13010 Sched<[sched.Folded, sched.ReadAfterFold]>; 13011 13012 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13013 (ins src_v.RC:$src2, f32mem:$src3), 13014 OpcodeStr, 13015 !strconcat("${src3}", _.BroadcastStr,", $src2"), 13016 !strconcat("$src2, ${src3}", _.BroadcastStr), 13017 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 13018 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 13019 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 13020 13021} 13022} // Constraints = "$src1 = $dst" 13023 13024multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 13025 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 13026 AVX512VLVectorVTInfo src_v, Predicate prd> { 13027 let Predicates = [prd] in { 13028 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 13029 src_v.info512>, EVEX_V512; 13030 } 13031 let Predicates = [HasVLX, prd] in { 13032 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 13033 src_v.info256>, EVEX_V256; 13034 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 13035 src_v.info128>, EVEX_V128; 13036 } 13037} 13038 13039let ExeDomain = SSEPackedSingle in 13040defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 13041 avx512vl_f32_info, avx512vl_bf16_info, 13042 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 13043 13044//===----------------------------------------------------------------------===// 13045// AVX512FP16 13046//===----------------------------------------------------------------------===// 13047 13048let Predicates = [HasFP16] in { 13049// Move word ( r/m16) to Packed word 13050def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 13051 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 13052def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 13053 "vmovw\t{$src, $dst|$dst, $src}", 13054 [(set VR128X:$dst, 13055 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 13056 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 13057 13058def : Pat<(f16 (bitconvert GR16:$src)), 13059 (f16 (COPY_TO_REGCLASS 13060 (VMOVW2SHrr 13061 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 13062 FR16X))>; 13063def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 13064 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 13065def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 13066 (VMOVW2SHrr GR32:$src)>; 13067// FIXME: We should really find a way to improve these patterns. 13068def : Pat<(v8i32 (X86vzmovl 13069 (insert_subvector undef, 13070 (v4i32 (scalar_to_vector 13071 (and GR32:$src, 0xffff))), 13072 (iPTR 0)))), 13073 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 13074def : Pat<(v16i32 (X86vzmovl 13075 (insert_subvector undef, 13076 (v4i32 (scalar_to_vector 13077 (and GR32:$src, 0xffff))), 13078 (iPTR 0)))), 13079 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 13080 13081def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), 13082 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 13083 13084// AVX 128-bit movw instruction write zeros in the high 128-bit part. 13085def : Pat<(v8i16 (X86vzload16 addr:$src)), 13086 (VMOVWrm addr:$src)>; 13087def : Pat<(v16i16 (X86vzload16 addr:$src)), 13088 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 13089 13090// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 13091def : Pat<(v32i16 (X86vzload16 addr:$src)), 13092 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 13093 13094def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 13095 (VMOVWrm addr:$src)>; 13096def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 13097 (VMOVWrm addr:$src)>; 13098def : Pat<(v8i32 (X86vzmovl 13099 (insert_subvector undef, 13100 (v4i32 (scalar_to_vector 13101 (i32 (zextloadi16 addr:$src)))), 13102 (iPTR 0)))), 13103 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 13104def : Pat<(v16i32 (X86vzmovl 13105 (insert_subvector undef, 13106 (v4i32 (scalar_to_vector 13107 (i32 (zextloadi16 addr:$src)))), 13108 (iPTR 0)))), 13109 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 13110 13111// Move word from xmm register to r/m16 13112def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 13113 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>; 13114def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 13115 (ins i16mem:$dst, VR128X:$src), 13116 "vmovw\t{$src, $dst|$dst, $src}", 13117 [(store (i16 (extractelt (v8i16 VR128X:$src), 13118 (iPTR 0))), addr:$dst)]>, 13119 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 13120 13121def : Pat<(i16 (bitconvert FR16X:$src)), 13122 (i16 (EXTRACT_SUBREG 13123 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 13124 sub_16bit))>; 13125def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 13126 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 13127 13128// Allow "vmovw" to use GR64 13129let hasSideEffects = 0 in { 13130 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 13131 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 13132 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 13133 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>; 13134} 13135} 13136 13137// Convert 16-bit float to i16/u16 13138multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13139 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13140 AVX512VLVectorVTInfo _Dst, 13141 AVX512VLVectorVTInfo _Src, 13142 X86SchedWriteWidths sched> { 13143 let Predicates = [HasFP16] in { 13144 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13145 OpNode, MaskOpNode, sched.ZMM>, 13146 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 13147 OpNodeRnd, sched.ZMM>, EVEX_V512; 13148 } 13149 let Predicates = [HasFP16, HasVLX] in { 13150 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13151 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13152 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13153 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13154 } 13155} 13156 13157// Convert 16-bit float to i16/u16 truncate 13158multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13159 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13160 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 13161 X86SchedWriteWidths sched> { 13162 let Predicates = [HasFP16] in { 13163 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13164 OpNode, MaskOpNode, sched.ZMM>, 13165 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 13166 OpNodeRnd, sched.ZMM>, EVEX_V512; 13167 } 13168 let Predicates = [HasFP16, HasVLX] in { 13169 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13170 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13171 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13172 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13173 } 13174} 13175 13176defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 13177 X86cvtp2UIntRnd, avx512vl_i16_info, 13178 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13179 T_MAP5PS, EVEX_CD8<16, CD8VF>; 13180defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 13181 X86VUintToFpRnd, avx512vl_f16_info, 13182 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13183 T_MAP5XD, EVEX_CD8<16, CD8VF>; 13184defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 13185 X86cvttp2si, X86cvttp2siSAE, 13186 avx512vl_i16_info, avx512vl_f16_info, 13187 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>; 13188defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 13189 X86cvttp2ui, X86cvttp2uiSAE, 13190 avx512vl_i16_info, avx512vl_f16_info, 13191 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>; 13192defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 13193 X86cvtp2IntRnd, avx512vl_i16_info, 13194 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13195 T_MAP5PD, EVEX_CD8<16, CD8VF>; 13196defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 13197 X86VSintToFpRnd, avx512vl_f16_info, 13198 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13199 T_MAP5XS, EVEX_CD8<16, CD8VF>; 13200 13201// Convert Half to Signed/Unsigned Doubleword 13202multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13203 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13204 X86SchedWriteWidths sched> { 13205 let Predicates = [HasFP16] in { 13206 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13207 MaskOpNode, sched.ZMM>, 13208 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 13209 OpNodeRnd, sched.ZMM>, EVEX_V512; 13210 } 13211 let Predicates = [HasFP16, HasVLX] in { 13212 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13213 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13214 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13215 MaskOpNode, sched.YMM>, EVEX_V256; 13216 } 13217} 13218 13219// Convert Half to Signed/Unsigned Doubleword with truncation 13220multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13221 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13222 X86SchedWriteWidths sched> { 13223 let Predicates = [HasFP16] in { 13224 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13225 MaskOpNode, sched.ZMM>, 13226 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 13227 OpNodeRnd, sched.ZMM>, EVEX_V512; 13228 } 13229 let Predicates = [HasFP16, HasVLX] in { 13230 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13231 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13232 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13233 MaskOpNode, sched.YMM>, EVEX_V256; 13234 } 13235} 13236 13237 13238defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 13239 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13240 EVEX_CD8<16, CD8VH>; 13241defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 13242 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS, 13243 EVEX_CD8<16, CD8VH>; 13244 13245defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 13246 X86cvttp2si, X86cvttp2siSAE, 13247 SchedWriteCvtPS2DQ>, T_MAP5XS, 13248 EVEX_CD8<16, CD8VH>; 13249 13250defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 13251 X86cvttp2ui, X86cvttp2uiSAE, 13252 SchedWriteCvtPS2DQ>, T_MAP5PS, 13253 EVEX_CD8<16, CD8VH>; 13254 13255// Convert Half to Signed/Unsigned Quardword 13256multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13257 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13258 X86SchedWriteWidths sched> { 13259 let Predicates = [HasFP16] in { 13260 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13261 MaskOpNode, sched.ZMM>, 13262 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 13263 OpNodeRnd, sched.ZMM>, EVEX_V512; 13264 } 13265 let Predicates = [HasFP16, HasVLX] in { 13266 // Explicitly specified broadcast string, since we take only 2 elements 13267 // from v8f16x_info source 13268 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13269 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 13270 EVEX_V128; 13271 // Explicitly specified broadcast string, since we take only 4 elements 13272 // from v8f16x_info source 13273 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13274 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 13275 EVEX_V256; 13276 } 13277} 13278 13279// Convert Half to Signed/Unsigned Quardword with truncation 13280multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13281 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13282 X86SchedWriteWidths sched> { 13283 let Predicates = [HasFP16] in { 13284 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13285 MaskOpNode, sched.ZMM>, 13286 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 13287 OpNodeRnd, sched.ZMM>, EVEX_V512; 13288 } 13289 let Predicates = [HasFP16, HasVLX] in { 13290 // Explicitly specified broadcast string, since we take only 2 elements 13291 // from v8f16x_info source 13292 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13293 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13294 // Explicitly specified broadcast string, since we take only 4 elements 13295 // from v8f16x_info source 13296 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13297 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13298 } 13299} 13300 13301defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13302 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13303 EVEX_CD8<16, CD8VQ>; 13304 13305defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13306 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13307 EVEX_CD8<16, CD8VQ>; 13308 13309defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13310 X86cvttp2si, X86cvttp2siSAE, 13311 SchedWriteCvtPS2DQ>, T_MAP5PD, 13312 EVEX_CD8<16, CD8VQ>; 13313 13314defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13315 X86cvttp2ui, X86cvttp2uiSAE, 13316 SchedWriteCvtPS2DQ>, T_MAP5PD, 13317 EVEX_CD8<16, CD8VQ>; 13318 13319// Convert Signed/Unsigned Quardword to Half 13320multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13321 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13322 X86SchedWriteWidths sched> { 13323 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13324 // 512 memory forms of these instructions in Asm Parcer. They have the same 13325 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13326 // due to the same reason. 13327 let Predicates = [HasFP16] in { 13328 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13329 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13330 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13331 OpNodeRnd, sched.ZMM>, EVEX_V512; 13332 } 13333 let Predicates = [HasFP16, HasVLX] in { 13334 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13335 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13336 i128mem, VK2WM>, 13337 EVEX_V128, NotEVEX2VEXConvertible; 13338 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13339 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13340 i256mem, VK4WM>, 13341 EVEX_V256, NotEVEX2VEXConvertible; 13342 } 13343 13344 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13345 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13346 VR128X:$src), 0, "att">; 13347 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13348 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13349 VK2WM:$mask, VR128X:$src), 0, "att">; 13350 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13351 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13352 VK2WM:$mask, VR128X:$src), 0, "att">; 13353 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13354 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13355 i64mem:$src), 0, "att">; 13356 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13357 "$dst {${mask}}, ${src}{1to2}}", 13358 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13359 VK2WM:$mask, i64mem:$src), 0, "att">; 13360 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13361 "$dst {${mask}} {z}, ${src}{1to2}}", 13362 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13363 VK2WM:$mask, i64mem:$src), 0, "att">; 13364 13365 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13366 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13367 VR256X:$src), 0, "att">; 13368 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13369 "$dst {${mask}}, $src}", 13370 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13371 VK4WM:$mask, VR256X:$src), 0, "att">; 13372 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13373 "$dst {${mask}} {z}, $src}", 13374 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13375 VK4WM:$mask, VR256X:$src), 0, "att">; 13376 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13377 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13378 i64mem:$src), 0, "att">; 13379 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13380 "$dst {${mask}}, ${src}{1to4}}", 13381 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13382 VK4WM:$mask, i64mem:$src), 0, "att">; 13383 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13384 "$dst {${mask}} {z}, ${src}{1to4}}", 13385 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13386 VK4WM:$mask, i64mem:$src), 0, "att">; 13387 13388 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13389 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13390 VR512:$src), 0, "att">; 13391 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13392 "$dst {${mask}}, $src}", 13393 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13394 VK8WM:$mask, VR512:$src), 0, "att">; 13395 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13396 "$dst {${mask}} {z}, $src}", 13397 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13398 VK8WM:$mask, VR512:$src), 0, "att">; 13399 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13400 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13401 i64mem:$src), 0, "att">; 13402 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13403 "$dst {${mask}}, ${src}{1to8}}", 13404 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13405 VK8WM:$mask, i64mem:$src), 0, "att">; 13406 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13407 "$dst {${mask}} {z}, ${src}{1to8}}", 13408 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13409 VK8WM:$mask, i64mem:$src), 0, "att">; 13410} 13411 13412defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13413 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS, 13414 EVEX_CD8<64, CD8VF>; 13415 13416defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13417 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD, 13418 EVEX_CD8<64, CD8VF>; 13419 13420// Convert half to signed/unsigned int 32/64 13421defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13422 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13423 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13424defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13425 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13426 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13427defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13428 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13429 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13430defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13431 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13432 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13433 13434defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13435 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13436 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13437defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13438 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13439 "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13440defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13441 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13442 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13443defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13444 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13445 "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13446 13447let Predicates = [HasFP16] in { 13448 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13449 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13450 T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13451 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13452 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13453 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>; 13454 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13455 v8f16x_info, i32mem, loadi32, 13456 "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13457 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13458 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13459 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>; 13460 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13461 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13462 13463 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13464 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13465 13466 13467 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13468 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13469 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13470 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13471 13472 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13473 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13474 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13475 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13476 13477 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13478 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13479 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13480 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13481 13482 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13483 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13484 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13485 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13486 13487 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13488 // which produce unnecessary vmovsh instructions 13489 def : Pat<(v8f16 (X86Movsh 13490 (v8f16 VR128X:$dst), 13491 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13492 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13493 13494 def : Pat<(v8f16 (X86Movsh 13495 (v8f16 VR128X:$dst), 13496 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13497 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13498 13499 def : Pat<(v8f16 (X86Movsh 13500 (v8f16 VR128X:$dst), 13501 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13502 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13503 13504 def : Pat<(v8f16 (X86Movsh 13505 (v8f16 VR128X:$dst), 13506 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13507 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13508 13509 def : Pat<(v8f16 (X86Movsh 13510 (v8f16 VR128X:$dst), 13511 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13512 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13513 13514 def : Pat<(v8f16 (X86Movsh 13515 (v8f16 VR128X:$dst), 13516 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13517 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13518 13519 def : Pat<(v8f16 (X86Movsh 13520 (v8f16 VR128X:$dst), 13521 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13522 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13523 13524 def : Pat<(v8f16 (X86Movsh 13525 (v8f16 VR128X:$dst), 13526 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13527 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13528} // Predicates = [HasFP16] 13529 13530let Predicates = [HasFP16, HasVLX] in { 13531 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13532 // patterns have been disabled with null_frag. 13533 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13534 (VCVTQQ2PHZ256rr VR256X:$src)>; 13535 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13536 VK4WM:$mask), 13537 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13538 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13539 VK4WM:$mask), 13540 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13541 13542 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13543 (VCVTQQ2PHZ256rm addr:$src)>; 13544 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13545 VK4WM:$mask), 13546 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13547 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13548 VK4WM:$mask), 13549 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13550 13551 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13552 (VCVTQQ2PHZ256rmb addr:$src)>; 13553 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13554 (v8f16 VR128X:$src0), VK4WM:$mask), 13555 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13556 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13557 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13558 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13559 13560 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13561 (VCVTQQ2PHZ128rr VR128X:$src)>; 13562 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13563 VK2WM:$mask), 13564 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13565 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13566 VK2WM:$mask), 13567 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13568 13569 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13570 (VCVTQQ2PHZ128rm addr:$src)>; 13571 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13572 VK2WM:$mask), 13573 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13574 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13575 VK2WM:$mask), 13576 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13577 13578 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13579 (VCVTQQ2PHZ128rmb addr:$src)>; 13580 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13581 (v8f16 VR128X:$src0), VK2WM:$mask), 13582 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13583 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13584 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13585 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13586 13587 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13588 // patterns have been disabled with null_frag. 13589 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13590 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13591 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13592 VK4WM:$mask), 13593 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13594 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13595 VK4WM:$mask), 13596 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13597 13598 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13599 (VCVTUQQ2PHZ256rm addr:$src)>; 13600 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13601 VK4WM:$mask), 13602 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13603 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13604 VK4WM:$mask), 13605 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13606 13607 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13608 (VCVTUQQ2PHZ256rmb addr:$src)>; 13609 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13610 (v8f16 VR128X:$src0), VK4WM:$mask), 13611 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13612 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13613 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13614 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13615 13616 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13617 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13618 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13619 VK2WM:$mask), 13620 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13621 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13622 VK2WM:$mask), 13623 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13624 13625 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13626 (VCVTUQQ2PHZ128rm addr:$src)>; 13627 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13628 VK2WM:$mask), 13629 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13630 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13631 VK2WM:$mask), 13632 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13633 13634 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13635 (VCVTUQQ2PHZ128rmb addr:$src)>; 13636 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13637 (v8f16 VR128X:$src0), VK2WM:$mask), 13638 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13639 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13640 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13641 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13642} 13643 13644let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13645 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13646 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13647 (ins _.RC:$src2, _.RC:$src3), 13648 OpcodeStr, "$src3, $src2", "$src2, $src3", 13649 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V; 13650 13651 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13652 (ins _.RC:$src2, _.MemOp:$src3), 13653 OpcodeStr, "$src3, $src2", "$src2, $src3", 13654 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V; 13655 13656 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13657 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13658 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13659 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V; 13660 } 13661} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13662 13663multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13664 X86VectorVTInfo _> { 13665 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13666 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13667 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13668 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13669 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13670 EVEX_4V, EVEX_B, EVEX_RC; 13671} 13672 13673 13674multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13675 let Predicates = [HasFP16] in { 13676 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13677 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13678 EVEX_V512, Sched<[WriteFMAZ]>; 13679 } 13680 let Predicates = [HasVLX, HasFP16] in { 13681 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13682 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13683 } 13684} 13685 13686multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13687 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13688 let Predicates = [HasFP16] in { 13689 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13690 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13691 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13692 "", "@earlyclobber $dst">, EVEX_V512; 13693 } 13694 let Predicates = [HasVLX, HasFP16] in { 13695 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13696 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13697 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13698 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13699 } 13700} 13701 13702 13703let Uses = [MXCSR] in { 13704 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13705 T_MAP6XS, EVEX_CD8<32, CD8VF>; 13706 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13707 T_MAP6XD, EVEX_CD8<32, CD8VF>; 13708 13709 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13710 x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>; 13711 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13712 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>; 13713} 13714 13715 13716multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13717 bit IsCommutable> { 13718 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13719 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13720 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13721 "$src3, $src2", "$src2, $src3", 13722 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13723 Sched<[WriteFMAX]>; 13724 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst), 13725 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13726 "$src3, $src2", "$src2, $src3", 13727 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13728 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13729 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13730 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13731 "$rc, $src3, $src2", "$src2, $src3, $rc", 13732 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13733 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13734 } 13735} 13736 13737multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13738 SDNode OpNodeRnd, bit IsCommutable> { 13739 let Predicates = [HasFP16] in { 13740 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13741 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13742 "$src2, $src1", "$src1, $src2", 13743 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13744 IsCommutable, IsCommutable, IsCommutable, 13745 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13746 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13747 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13748 "$src2, $src1", "$src1, $src2", 13749 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13750 0, 0, 0, X86selects, "@earlyclobber $dst">, 13751 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13752 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13753 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13754 "$rc, $src2, $src1", "$src1, $src2, $rc", 13755 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13756 0, 0, 0, X86selects, "@earlyclobber $dst">, 13757 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13758 } 13759} 13760 13761let Uses = [MXCSR] in { 13762 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13763 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13764 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13765 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13766 13767 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13768 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13769 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13770 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13771} 13772