1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 32), 4, 48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; 49 50 // The vector VT. 51 ValueType VT = !cast<ValueType>(VTName); 52 53 string EltTypeName = !cast<string>(EltVT); 54 // Size of the element type in bits, e.g. 32 for v16i32. 55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 56 int EltSize = EltVT.Size; 57 58 // "i" for integer types and "f" for floating-point types 59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 60 61 // Size of RC in bits, e.g. 512 for VR512. 62 int Size = VT.Size; 63 64 // The corresponding memory operand, e.g. i512mem for VR512. 65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 67 // FP scalar memory operand for intrinsics - ssmem/sdmem. 68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)); 70 71 // Load patterns 72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 73 74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 75 76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 77 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 78 79 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), 80 !cast<ComplexPattern>("sse_load_f32"), 81 !if (!eq (EltTypeName, "f64"), 82 !cast<ComplexPattern>("sse_load_f64"), 83 ?)); 84 85 // The string to specify embedded broadcast in assembly. 86 string BroadcastStr = "{1to" # NumElts # "}"; 87 88 // 8-bit compressed displacement tuple/subvector format. This is only 89 // defined for NumElts <= 8. 90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 91 !cast<CD8VForm>("CD8VT" # NumElts), ?); 92 93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 94 !if (!eq (Size, 256), sub_ymm, ?)); 95 96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 98 SSEPackedInt)); 99 100 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); 101 102 dag ImmAllZerosV = (VT immAllZerosV); 103 104 string ZSuffix = !if (!eq (Size, 128), "Z128", 105 !if (!eq (Size, 256), "Z256", "Z")); 106} 107 108def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 111def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 113def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 114 115// "x" in v32i8x_info means RC = VR256X 116def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 118def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 119def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 120def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 121def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 122 123def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 124def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 125def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 126def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 127def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 128def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 129 130// We map scalar types to the smallest (128-bit) vector type 131// with the appropriate element type. This allows to use the same masking logic. 132def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 133def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 134def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 135def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 136 137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 138 X86VectorVTInfo i128> { 139 X86VectorVTInfo info512 = i512; 140 X86VectorVTInfo info256 = i256; 141 X86VectorVTInfo info128 = i128; 142} 143 144def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 145 v16i8x_info>; 146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 147 v8i16x_info>; 148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 149 v4i32x_info>; 150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 151 v2i64x_info>; 152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 153 v4f32x_info>; 154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 155 v2f64x_info>; 156 157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 158 ValueType _vt> { 159 RegisterClass KRC = _krc; 160 RegisterClass KRCWM = _krcwm; 161 ValueType KVT = _vt; 162} 163 164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 171 172// This multiclass generates the masking variants from the non-masking 173// variant. It only provides the assembly pieces for the masking variants. 174// It assumes custom ISel patterns for masking which can be provided as 175// template arguments. 176multiclass AVX512_maskable_custom<bits<8> O, Format F, 177 dag Outs, 178 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 179 string OpcodeStr, 180 string AttSrcAsm, string IntelSrcAsm, 181 list<dag> Pattern, 182 list<dag> MaskingPattern, 183 list<dag> ZeroMaskingPattern, 184 string MaskingConstraint = "", 185 bit IsCommutable = 0, 186 bit IsKCommutable = 0, 187 bit IsKZCommutable = IsCommutable> { 188 let isCommutable = IsCommutable in 189 def NAME: AVX512<O, F, Outs, Ins, 190 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 191 "$dst, "#IntelSrcAsm#"}", 192 Pattern>; 193 194 // Prefer over VMOV*rrk Pat<> 195 let isCommutable = IsKCommutable in 196 def NAME#k: AVX512<O, F, Outs, MaskingIns, 197 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 198 "$dst {${mask}}, "#IntelSrcAsm#"}", 199 MaskingPattern>, 200 EVEX_K { 201 // In case of the 3src subclass this is overridden with a let. 202 string Constraints = MaskingConstraint; 203 } 204 205 // Zero mask does not add any restrictions to commute operands transformation. 206 // So, it is Ok to use IsCommutable instead of IsKCommutable. 207 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<> 208 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 209 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 210 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 211 ZeroMaskingPattern>, 212 EVEX_KZ; 213} 214 215 216// Common base class of AVX512_maskable and AVX512_maskable_3src. 217multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 218 dag Outs, 219 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 220 string OpcodeStr, 221 string AttSrcAsm, string IntelSrcAsm, 222 dag RHS, dag MaskingRHS, 223 SDNode Select = vselect, 224 string MaskingConstraint = "", 225 bit IsCommutable = 0, 226 bit IsKCommutable = 0, 227 bit IsKZCommutable = IsCommutable> : 228 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 229 AttSrcAsm, IntelSrcAsm, 230 [(set _.RC:$dst, RHS)], 231 [(set _.RC:$dst, MaskingRHS)], 232 [(set _.RC:$dst, 233 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 234 MaskingConstraint, IsCommutable, 235 IsKCommutable, IsKZCommutable>; 236 237// This multiclass generates the unconditional/non-masking, the masking and 238// the zero-masking variant of the vector instruction. In the masking case, the 239// perserved vector elements come from a new dummy input operand tied to $dst. 240// This version uses a separate dag for non-masking and masking. 241multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 242 dag Outs, dag Ins, string OpcodeStr, 243 string AttSrcAsm, string IntelSrcAsm, 244 dag RHS, dag MaskRHS, 245 bit IsCommutable = 0, bit IsKCommutable = 0, 246 SDNode Select = vselect> : 247 AVX512_maskable_custom<O, F, Outs, Ins, 248 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 249 !con((ins _.KRCWM:$mask), Ins), 250 OpcodeStr, AttSrcAsm, IntelSrcAsm, 251 [(set _.RC:$dst, RHS)], 252 [(set _.RC:$dst, 253 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 254 [(set _.RC:$dst, 255 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 256 "$src0 = $dst", IsCommutable, IsKCommutable>; 257 258// This multiclass generates the unconditional/non-masking, the masking and 259// the zero-masking variant of the vector instruction. In the masking case, the 260// perserved vector elements come from a new dummy input operand tied to $dst. 261multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 262 dag Outs, dag Ins, string OpcodeStr, 263 string AttSrcAsm, string IntelSrcAsm, 264 dag RHS, 265 bit IsCommutable = 0, bit IsKCommutable = 0, 266 bit IsKZCommutable = IsCommutable, 267 SDNode Select = vselect> : 268 AVX512_maskable_common<O, F, _, Outs, Ins, 269 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 270 !con((ins _.KRCWM:$mask), Ins), 271 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 272 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 273 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 274 IsKZCommutable>; 275 276// This multiclass generates the unconditional/non-masking, the masking and 277// the zero-masking variant of the scalar instruction. 278multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 279 dag Outs, dag Ins, string OpcodeStr, 280 string AttSrcAsm, string IntelSrcAsm, 281 dag RHS> : 282 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 283 RHS, 0, 0, 0, X86selects>; 284 285// Similar to AVX512_maskable but in this case one of the source operands 286// ($src1) is already tied to $dst so we just use that for the preserved 287// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 288// $src1. 289multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 290 dag Outs, dag NonTiedIns, string OpcodeStr, 291 string AttSrcAsm, string IntelSrcAsm, 292 dag RHS, 293 bit IsCommutable = 0, 294 bit IsKCommutable = 0, 295 SDNode Select = vselect, 296 bit MaskOnly = 0> : 297 AVX512_maskable_common<O, F, _, Outs, 298 !con((ins _.RC:$src1), NonTiedIns), 299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 300 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 301 OpcodeStr, AttSrcAsm, IntelSrcAsm, 302 !if(MaskOnly, (null_frag), RHS), 303 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 304 Select, "", IsCommutable, IsKCommutable>; 305 306// Similar to AVX512_maskable_3src but in this case the input VT for the tied 307// operand differs from the output VT. This requires a bitconvert on 308// the preserved vector going into the vselect. 309// NOTE: The unmasked pattern is disabled. 310multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 311 X86VectorVTInfo InVT, 312 dag Outs, dag NonTiedIns, string OpcodeStr, 313 string AttSrcAsm, string IntelSrcAsm, 314 dag RHS, bit IsCommutable = 0> : 315 AVX512_maskable_common<O, F, OutVT, Outs, 316 !con((ins InVT.RC:$src1), NonTiedIns), 317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 318 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 319 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 320 (vselect InVT.KRCWM:$mask, RHS, 321 (bitconvert InVT.RC:$src1)), 322 vselect, "", IsCommutable>; 323 324multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 325 dag Outs, dag NonTiedIns, string OpcodeStr, 326 string AttSrcAsm, string IntelSrcAsm, 327 dag RHS, 328 bit IsCommutable = 0, 329 bit IsKCommutable = 0, 330 bit MaskOnly = 0> : 331 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 332 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 333 X86selects, MaskOnly>; 334 335multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 336 dag Outs, dag Ins, 337 string OpcodeStr, 338 string AttSrcAsm, string IntelSrcAsm, 339 list<dag> Pattern> : 340 AVX512_maskable_custom<O, F, Outs, Ins, 341 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 342 !con((ins _.KRCWM:$mask), Ins), 343 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 344 "$src0 = $dst">; 345 346multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 347 dag Outs, dag NonTiedIns, 348 string OpcodeStr, 349 string AttSrcAsm, string IntelSrcAsm, 350 list<dag> Pattern> : 351 AVX512_maskable_custom<O, F, Outs, 352 !con((ins _.RC:$src1), NonTiedIns), 353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 354 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 355 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 356 "">; 357 358// Instruction with mask that puts result in mask register, 359// like "compare" and "vptest" 360multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 361 dag Outs, 362 dag Ins, dag MaskingIns, 363 string OpcodeStr, 364 string AttSrcAsm, string IntelSrcAsm, 365 list<dag> Pattern, 366 list<dag> MaskingPattern, 367 bit IsCommutable = 0> { 368 let isCommutable = IsCommutable in { 369 def NAME: AVX512<O, F, Outs, Ins, 370 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 371 "$dst, "#IntelSrcAsm#"}", 372 Pattern>; 373 374 def NAME#k: AVX512<O, F, Outs, MaskingIns, 375 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 376 "$dst {${mask}}, "#IntelSrcAsm#"}", 377 MaskingPattern>, EVEX_K; 378 } 379} 380 381multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 382 dag Outs, 383 dag Ins, dag MaskingIns, 384 string OpcodeStr, 385 string AttSrcAsm, string IntelSrcAsm, 386 dag RHS, dag MaskingRHS, 387 bit IsCommutable = 0> : 388 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 389 AttSrcAsm, IntelSrcAsm, 390 [(set _.KRC:$dst, RHS)], 391 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 392 393multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 394 dag Outs, dag Ins, string OpcodeStr, 395 string AttSrcAsm, string IntelSrcAsm, 396 dag RHS, dag RHS_su, bit IsCommutable = 0> : 397 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 398 !con((ins _.KRCWM:$mask), Ins), 399 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 400 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 401 402 403// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 404// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 405// swizzled by ExecutionDomainFix to pxor. 406// We set canFoldAsLoad because this can be converted to a constant-pool 407// load of an all-zeros value if folding it would be beneficial. 408let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 409 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 410def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 411 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 412def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 413 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 414} 415 416let Predicates = [HasAVX512] in { 417def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 418def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 419def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 420def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 421def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 422} 423 424// Alias instructions that allow VPTERNLOG to be used with a mask to create 425// a mix of all ones and all zeros elements. This is done this way to force 426// the same register to be used as input for all three sources. 427let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 428def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 429 (ins VK16WM:$mask), "", 430 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 431 (v16i32 immAllOnesV), 432 (v16i32 immAllZerosV)))]>; 433def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 434 (ins VK8WM:$mask), "", 435 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 436 (v8i64 immAllOnesV), 437 (v8i64 immAllZerosV)))]>; 438} 439 440let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 441 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 442def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 443 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 444def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 445 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 446} 447 448let Predicates = [HasAVX512] in { 449def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 450def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 451def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 452def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 453def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 454def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 455def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 456def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 457def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 458def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 459} 460 461// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 462// This is expanded by ExpandPostRAPseudos. 463let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 464 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 465 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 466 [(set FR32X:$dst, fp32imm0)]>; 467 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 468 [(set FR64X:$dst, fp64imm0)]>; 469 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 470 [(set VR128X:$dst, fp128imm0)]>; 471} 472 473//===----------------------------------------------------------------------===// 474// AVX-512 - VECTOR INSERT 475// 476 477// Supports two different pattern operators for mask and unmasked ops. Allows 478// null_frag to be passed for one. 479multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 480 X86VectorVTInfo To, 481 SDPatternOperator vinsert_insert, 482 SDPatternOperator vinsert_for_mask, 483 X86FoldableSchedWrite sched> { 484 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 485 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 486 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 487 "vinsert" # From.EltTypeName # "x" # From.NumElts, 488 "$src3, $src2, $src1", "$src1, $src2, $src3", 489 (vinsert_insert:$src3 (To.VT To.RC:$src1), 490 (From.VT From.RC:$src2), 491 (iPTR imm)), 492 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 493 (From.VT From.RC:$src2), 494 (iPTR imm))>, 495 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 496 let mayLoad = 1 in 497 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 498 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 499 "vinsert" # From.EltTypeName # "x" # From.NumElts, 500 "$src3, $src2, $src1", "$src1, $src2, $src3", 501 (vinsert_insert:$src3 (To.VT To.RC:$src1), 502 (From.VT (From.LdFrag addr:$src2)), 503 (iPTR imm)), 504 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 505 (From.VT (From.LdFrag addr:$src2)), 506 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 507 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 508 Sched<[sched.Folded, sched.ReadAfterFold]>; 509 } 510} 511 512// Passes the same pattern operator for masked and unmasked ops. 513multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 514 X86VectorVTInfo To, 515 SDPatternOperator vinsert_insert, 516 X86FoldableSchedWrite sched> : 517 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 518 519multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 520 X86VectorVTInfo To, PatFrag vinsert_insert, 521 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 522 let Predicates = p in { 523 def : Pat<(vinsert_insert:$ins 524 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 525 (To.VT (!cast<Instruction>(InstrStr#"rr") 526 To.RC:$src1, From.RC:$src2, 527 (INSERT_get_vinsert_imm To.RC:$ins)))>; 528 529 def : Pat<(vinsert_insert:$ins 530 (To.VT To.RC:$src1), 531 (From.VT (From.LdFrag addr:$src2)), 532 (iPTR imm)), 533 (To.VT (!cast<Instruction>(InstrStr#"rm") 534 To.RC:$src1, addr:$src2, 535 (INSERT_get_vinsert_imm To.RC:$ins)))>; 536 } 537} 538 539multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 540 ValueType EltVT64, int Opcode256, 541 X86FoldableSchedWrite sched> { 542 543 let Predicates = [HasVLX] in 544 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 545 X86VectorVTInfo< 4, EltVT32, VR128X>, 546 X86VectorVTInfo< 8, EltVT32, VR256X>, 547 vinsert128_insert, sched>, EVEX_V256; 548 549 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 550 X86VectorVTInfo< 4, EltVT32, VR128X>, 551 X86VectorVTInfo<16, EltVT32, VR512>, 552 vinsert128_insert, sched>, EVEX_V512; 553 554 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 555 X86VectorVTInfo< 4, EltVT64, VR256X>, 556 X86VectorVTInfo< 8, EltVT64, VR512>, 557 vinsert256_insert, sched>, VEX_W, EVEX_V512; 558 559 // Even with DQI we'd like to only use these instructions for masking. 560 let Predicates = [HasVLX, HasDQI] in 561 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 562 X86VectorVTInfo< 2, EltVT64, VR128X>, 563 X86VectorVTInfo< 4, EltVT64, VR256X>, 564 null_frag, vinsert128_insert, sched>, 565 VEX_W1X, EVEX_V256; 566 567 // Even with DQI we'd like to only use these instructions for masking. 568 let Predicates = [HasDQI] in { 569 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 570 X86VectorVTInfo< 2, EltVT64, VR128X>, 571 X86VectorVTInfo< 8, EltVT64, VR512>, 572 null_frag, vinsert128_insert, sched>, 573 VEX_W, EVEX_V512; 574 575 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 576 X86VectorVTInfo< 8, EltVT32, VR256X>, 577 X86VectorVTInfo<16, EltVT32, VR512>, 578 null_frag, vinsert256_insert, sched>, 579 EVEX_V512; 580 } 581} 582 583// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 584defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 585defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 586 587// Codegen pattern with the alternative types, 588// Even with AVX512DQ we'll still use these for unmasked operations. 589defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 590 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 591defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 592 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 593 594defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 595 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 596defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 597 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 598 599defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 600 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 601defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 602 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 603 604// Codegen pattern with the alternative types insert VEC128 into VEC256 605defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 606 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 607defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 608 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 609// Codegen pattern with the alternative types insert VEC128 into VEC512 610defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 612defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 614// Codegen pattern with the alternative types insert VEC256 into VEC512 615defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 616 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 617defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 618 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 619 620 621multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 622 X86VectorVTInfo To, X86VectorVTInfo Cast, 623 PatFrag vinsert_insert, 624 SDNodeXForm INSERT_get_vinsert_imm, 625 list<Predicate> p> { 626let Predicates = p in { 627 def : Pat<(Cast.VT 628 (vselect Cast.KRCWM:$mask, 629 (bitconvert 630 (vinsert_insert:$ins (To.VT To.RC:$src1), 631 (From.VT From.RC:$src2), 632 (iPTR imm))), 633 Cast.RC:$src0)), 634 (!cast<Instruction>(InstrStr#"rrk") 635 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 636 (INSERT_get_vinsert_imm To.RC:$ins))>; 637 def : Pat<(Cast.VT 638 (vselect Cast.KRCWM:$mask, 639 (bitconvert 640 (vinsert_insert:$ins (To.VT To.RC:$src1), 641 (From.VT 642 (bitconvert 643 (From.LdFrag addr:$src2))), 644 (iPTR imm))), 645 Cast.RC:$src0)), 646 (!cast<Instruction>(InstrStr#"rmk") 647 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 648 (INSERT_get_vinsert_imm To.RC:$ins))>; 649 650 def : Pat<(Cast.VT 651 (vselect Cast.KRCWM:$mask, 652 (bitconvert 653 (vinsert_insert:$ins (To.VT To.RC:$src1), 654 (From.VT From.RC:$src2), 655 (iPTR imm))), 656 Cast.ImmAllZerosV)), 657 (!cast<Instruction>(InstrStr#"rrkz") 658 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 659 (INSERT_get_vinsert_imm To.RC:$ins))>; 660 def : Pat<(Cast.VT 661 (vselect Cast.KRCWM:$mask, 662 (bitconvert 663 (vinsert_insert:$ins (To.VT To.RC:$src1), 664 (From.VT (From.LdFrag addr:$src2)), 665 (iPTR imm))), 666 Cast.ImmAllZerosV)), 667 (!cast<Instruction>(InstrStr#"rmkz") 668 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 669 (INSERT_get_vinsert_imm To.RC:$ins))>; 670} 671} 672 673defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 674 v8f32x_info, vinsert128_insert, 675 INSERT_get_vinsert128_imm, [HasVLX]>; 676defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 677 v4f64x_info, vinsert128_insert, 678 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 679 680defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 681 v8i32x_info, vinsert128_insert, 682 INSERT_get_vinsert128_imm, [HasVLX]>; 683defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 684 v8i32x_info, vinsert128_insert, 685 INSERT_get_vinsert128_imm, [HasVLX]>; 686defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 687 v8i32x_info, vinsert128_insert, 688 INSERT_get_vinsert128_imm, [HasVLX]>; 689defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 690 v4i64x_info, vinsert128_insert, 691 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 692defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 693 v4i64x_info, vinsert128_insert, 694 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 695defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 696 v4i64x_info, vinsert128_insert, 697 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 698 699defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 700 v16f32_info, vinsert128_insert, 701 INSERT_get_vinsert128_imm, [HasAVX512]>; 702defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 703 v8f64_info, vinsert128_insert, 704 INSERT_get_vinsert128_imm, [HasDQI]>; 705 706defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 707 v16i32_info, vinsert128_insert, 708 INSERT_get_vinsert128_imm, [HasAVX512]>; 709defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 710 v16i32_info, vinsert128_insert, 711 INSERT_get_vinsert128_imm, [HasAVX512]>; 712defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 713 v16i32_info, vinsert128_insert, 714 INSERT_get_vinsert128_imm, [HasAVX512]>; 715defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 716 v8i64_info, vinsert128_insert, 717 INSERT_get_vinsert128_imm, [HasDQI]>; 718defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 719 v8i64_info, vinsert128_insert, 720 INSERT_get_vinsert128_imm, [HasDQI]>; 721defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 722 v8i64_info, vinsert128_insert, 723 INSERT_get_vinsert128_imm, [HasDQI]>; 724 725defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 726 v16f32_info, vinsert256_insert, 727 INSERT_get_vinsert256_imm, [HasDQI]>; 728defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 729 v8f64_info, vinsert256_insert, 730 INSERT_get_vinsert256_imm, [HasAVX512]>; 731 732defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 733 v16i32_info, vinsert256_insert, 734 INSERT_get_vinsert256_imm, [HasDQI]>; 735defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 736 v16i32_info, vinsert256_insert, 737 INSERT_get_vinsert256_imm, [HasDQI]>; 738defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 739 v16i32_info, vinsert256_insert, 740 INSERT_get_vinsert256_imm, [HasDQI]>; 741defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 742 v8i64_info, vinsert256_insert, 743 INSERT_get_vinsert256_imm, [HasAVX512]>; 744defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 745 v8i64_info, vinsert256_insert, 746 INSERT_get_vinsert256_imm, [HasAVX512]>; 747defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 748 v8i64_info, vinsert256_insert, 749 INSERT_get_vinsert256_imm, [HasAVX512]>; 750 751// vinsertps - insert f32 to XMM 752let ExeDomain = SSEPackedSingle in { 753let isCommutable = 1 in 754def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 755 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 756 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 757 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 758 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 759def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 760 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 761 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 762 [(set VR128X:$dst, (X86insertps VR128X:$src1, 763 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 764 timm:$src3))]>, 765 EVEX_4V, EVEX_CD8<32, CD8VT1>, 766 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 767} 768 769//===----------------------------------------------------------------------===// 770// AVX-512 VECTOR EXTRACT 771//--- 772 773// Supports two different pattern operators for mask and unmasked ops. Allows 774// null_frag to be passed for one. 775multiclass vextract_for_size_split<int Opcode, 776 X86VectorVTInfo From, X86VectorVTInfo To, 777 SDPatternOperator vextract_extract, 778 SDPatternOperator vextract_for_mask, 779 SchedWrite SchedRR, SchedWrite SchedMR> { 780 781 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 782 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 783 (ins From.RC:$src1, u8imm:$idx), 784 "vextract" # To.EltTypeName # "x" # To.NumElts, 785 "$idx, $src1", "$src1, $idx", 786 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 787 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 788 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 789 790 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 791 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 792 "vextract" # To.EltTypeName # "x" # To.NumElts # 793 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 794 [(store (To.VT (vextract_extract:$idx 795 (From.VT From.RC:$src1), (iPTR imm))), 796 addr:$dst)]>, EVEX, 797 Sched<[SchedMR]>; 798 799 let mayStore = 1, hasSideEffects = 0 in 800 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 801 (ins To.MemOp:$dst, To.KRCWM:$mask, 802 From.RC:$src1, u8imm:$idx), 803 "vextract" # To.EltTypeName # "x" # To.NumElts # 804 "\t{$idx, $src1, $dst {${mask}}|" 805 "$dst {${mask}}, $src1, $idx}", []>, 806 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 807 } 808} 809 810// Passes the same pattern operator for masked and unmasked ops. 811multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 812 X86VectorVTInfo To, 813 SDPatternOperator vextract_extract, 814 SchedWrite SchedRR, SchedWrite SchedMR> : 815 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 816 817// Codegen pattern for the alternative types 818multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 819 X86VectorVTInfo To, PatFrag vextract_extract, 820 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 821 let Predicates = p in { 822 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 823 (To.VT (!cast<Instruction>(InstrStr#"rr") 824 From.RC:$src1, 825 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 826 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 827 (iPTR imm))), addr:$dst), 828 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 829 (EXTRACT_get_vextract_imm To.RC:$ext))>; 830 } 831} 832 833multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 834 ValueType EltVT64, int Opcode256, 835 SchedWrite SchedRR, SchedWrite SchedMR> { 836 let Predicates = [HasAVX512] in { 837 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 838 X86VectorVTInfo<16, EltVT32, VR512>, 839 X86VectorVTInfo< 4, EltVT32, VR128X>, 840 vextract128_extract, SchedRR, SchedMR>, 841 EVEX_V512, EVEX_CD8<32, CD8VT4>; 842 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 843 X86VectorVTInfo< 8, EltVT64, VR512>, 844 X86VectorVTInfo< 4, EltVT64, VR256X>, 845 vextract256_extract, SchedRR, SchedMR>, 846 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 847 } 848 let Predicates = [HasVLX] in 849 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 850 X86VectorVTInfo< 8, EltVT32, VR256X>, 851 X86VectorVTInfo< 4, EltVT32, VR128X>, 852 vextract128_extract, SchedRR, SchedMR>, 853 EVEX_V256, EVEX_CD8<32, CD8VT4>; 854 855 // Even with DQI we'd like to only use these instructions for masking. 856 let Predicates = [HasVLX, HasDQI] in 857 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 858 X86VectorVTInfo< 4, EltVT64, VR256X>, 859 X86VectorVTInfo< 2, EltVT64, VR128X>, 860 null_frag, vextract128_extract, SchedRR, SchedMR>, 861 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 862 863 // Even with DQI we'd like to only use these instructions for masking. 864 let Predicates = [HasDQI] in { 865 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 866 X86VectorVTInfo< 8, EltVT64, VR512>, 867 X86VectorVTInfo< 2, EltVT64, VR128X>, 868 null_frag, vextract128_extract, SchedRR, SchedMR>, 869 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 870 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 871 X86VectorVTInfo<16, EltVT32, VR512>, 872 X86VectorVTInfo< 8, EltVT32, VR256X>, 873 null_frag, vextract256_extract, SchedRR, SchedMR>, 874 EVEX_V512, EVEX_CD8<32, CD8VT8>; 875 } 876} 877 878// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 879defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 880defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 881 882// extract_subvector codegen patterns with the alternative types. 883// Even with AVX512DQ we'll still use these for unmasked operations. 884defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 885 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 886defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 887 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 888 889defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 890 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 891defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 892 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 893 894defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 895 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 896defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 897 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 898 899// Codegen pattern with the alternative types extract VEC128 from VEC256 900defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 901 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 902defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 903 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 904 905// Codegen pattern with the alternative types extract VEC128 from VEC512 906defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 907 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 908defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 909 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 910// Codegen pattern with the alternative types extract VEC256 from VEC512 911defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 912 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 913defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 914 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 915 916 917// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 918// smaller extract to enable EVEX->VEX. 919let Predicates = [NoVLX] in { 920def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 921 (v2i64 (VEXTRACTI128rr 922 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 923 (iPTR 1)))>; 924def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 925 (v2f64 (VEXTRACTF128rr 926 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 927 (iPTR 1)))>; 928def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 929 (v4i32 (VEXTRACTI128rr 930 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 931 (iPTR 1)))>; 932def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 933 (v4f32 (VEXTRACTF128rr 934 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 935 (iPTR 1)))>; 936def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 937 (v8i16 (VEXTRACTI128rr 938 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 939 (iPTR 1)))>; 940def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 941 (v16i8 (VEXTRACTI128rr 942 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 943 (iPTR 1)))>; 944} 945 946// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 947// smaller extract to enable EVEX->VEX. 948let Predicates = [HasVLX] in { 949def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 950 (v2i64 (VEXTRACTI32x4Z256rr 951 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 952 (iPTR 1)))>; 953def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 954 (v2f64 (VEXTRACTF32x4Z256rr 955 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 956 (iPTR 1)))>; 957def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 958 (v4i32 (VEXTRACTI32x4Z256rr 959 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 960 (iPTR 1)))>; 961def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 962 (v4f32 (VEXTRACTF32x4Z256rr 963 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 964 (iPTR 1)))>; 965def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 966 (v8i16 (VEXTRACTI32x4Z256rr 967 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 968 (iPTR 1)))>; 969def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 970 (v16i8 (VEXTRACTI32x4Z256rr 971 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 972 (iPTR 1)))>; 973} 974 975 976// Additional patterns for handling a bitcast between the vselect and the 977// extract_subvector. 978multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 979 X86VectorVTInfo To, X86VectorVTInfo Cast, 980 PatFrag vextract_extract, 981 SDNodeXForm EXTRACT_get_vextract_imm, 982 list<Predicate> p> { 983let Predicates = p in { 984 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 985 (bitconvert 986 (To.VT (vextract_extract:$ext 987 (From.VT From.RC:$src), (iPTR imm)))), 988 To.RC:$src0)), 989 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 990 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 991 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 992 993 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 994 (bitconvert 995 (To.VT (vextract_extract:$ext 996 (From.VT From.RC:$src), (iPTR imm)))), 997 Cast.ImmAllZerosV)), 998 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 999 Cast.KRCWM:$mask, From.RC:$src, 1000 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1001} 1002} 1003 1004defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1005 v4f32x_info, vextract128_extract, 1006 EXTRACT_get_vextract128_imm, [HasVLX]>; 1007defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1008 v2f64x_info, vextract128_extract, 1009 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1010 1011defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1012 v4i32x_info, vextract128_extract, 1013 EXTRACT_get_vextract128_imm, [HasVLX]>; 1014defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1015 v4i32x_info, vextract128_extract, 1016 EXTRACT_get_vextract128_imm, [HasVLX]>; 1017defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1018 v4i32x_info, vextract128_extract, 1019 EXTRACT_get_vextract128_imm, [HasVLX]>; 1020defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1021 v2i64x_info, vextract128_extract, 1022 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1023defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1024 v2i64x_info, vextract128_extract, 1025 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1026defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1027 v2i64x_info, vextract128_extract, 1028 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1029 1030defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1031 v4f32x_info, vextract128_extract, 1032 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1033defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1034 v2f64x_info, vextract128_extract, 1035 EXTRACT_get_vextract128_imm, [HasDQI]>; 1036 1037defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1038 v4i32x_info, vextract128_extract, 1039 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1040defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1041 v4i32x_info, vextract128_extract, 1042 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1043defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1044 v4i32x_info, vextract128_extract, 1045 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1046defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1047 v2i64x_info, vextract128_extract, 1048 EXTRACT_get_vextract128_imm, [HasDQI]>; 1049defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1050 v2i64x_info, vextract128_extract, 1051 EXTRACT_get_vextract128_imm, [HasDQI]>; 1052defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1053 v2i64x_info, vextract128_extract, 1054 EXTRACT_get_vextract128_imm, [HasDQI]>; 1055 1056defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1057 v8f32x_info, vextract256_extract, 1058 EXTRACT_get_vextract256_imm, [HasDQI]>; 1059defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1060 v4f64x_info, vextract256_extract, 1061 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1062 1063defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1064 v8i32x_info, vextract256_extract, 1065 EXTRACT_get_vextract256_imm, [HasDQI]>; 1066defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1067 v8i32x_info, vextract256_extract, 1068 EXTRACT_get_vextract256_imm, [HasDQI]>; 1069defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1070 v8i32x_info, vextract256_extract, 1071 EXTRACT_get_vextract256_imm, [HasDQI]>; 1072defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1073 v4i64x_info, vextract256_extract, 1074 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1075defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1076 v4i64x_info, vextract256_extract, 1077 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1078defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1079 v4i64x_info, vextract256_extract, 1080 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1081 1082// vextractps - extract 32 bits from XMM 1083def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), 1084 (ins VR128X:$src1, u8imm:$src2), 1085 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1086 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1087 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1088 1089def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1090 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1091 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1092 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1093 addr:$dst)]>, 1094 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1095 1096//===---------------------------------------------------------------------===// 1097// AVX-512 BROADCAST 1098//--- 1099// broadcast with a scalar argument. 1100multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, 1101 string Name, 1102 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { 1103 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1104 (!cast<Instruction>(Name#DestInfo.ZSuffix#r) 1105 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1106 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1107 (X86VBroadcast SrcInfo.FRC:$src), 1108 DestInfo.RC:$src0)), 1109 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk) 1110 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1111 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1112 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1113 (X86VBroadcast SrcInfo.FRC:$src), 1114 DestInfo.ImmAllZerosV)), 1115 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz) 1116 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1117} 1118 1119// Split version to allow mask and broadcast node to be different types. This 1120// helps support the 32x2 broadcasts. 1121multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1122 string Name, 1123 SchedWrite SchedRR, SchedWrite SchedRM, 1124 X86VectorVTInfo MaskInfo, 1125 X86VectorVTInfo DestInfo, 1126 X86VectorVTInfo SrcInfo, 1127 bit IsConvertibleToThreeAddress, 1128 SDPatternOperator UnmaskedOp = X86VBroadcast, 1129 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1130 let hasSideEffects = 0 in 1131 def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1132 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1133 [(set MaskInfo.RC:$dst, 1134 (MaskInfo.VT 1135 (bitconvert 1136 (DestInfo.VT 1137 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1138 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1139 def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1140 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1141 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1142 "${dst} {${mask}} {z}, $src}"), 1143 [(set MaskInfo.RC:$dst, 1144 (vselect MaskInfo.KRCWM:$mask, 1145 (MaskInfo.VT 1146 (bitconvert 1147 (DestInfo.VT 1148 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1149 MaskInfo.ImmAllZerosV))], 1150 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1151 let Constraints = "$src0 = $dst" in 1152 def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1153 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1154 SrcInfo.RC:$src), 1155 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1156 "${dst} {${mask}}, $src}"), 1157 [(set MaskInfo.RC:$dst, 1158 (vselect MaskInfo.KRCWM:$mask, 1159 (MaskInfo.VT 1160 (bitconvert 1161 (DestInfo.VT 1162 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1163 MaskInfo.RC:$src0))], 1164 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1165 1166 let hasSideEffects = 0, mayLoad = 1 in 1167 def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1168 (ins SrcInfo.ScalarMemOp:$src), 1169 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1170 [(set MaskInfo.RC:$dst, 1171 (MaskInfo.VT 1172 (bitconvert 1173 (DestInfo.VT 1174 (UnmaskedBcastOp addr:$src)))))], 1175 DestInfo.ExeDomain>, T8PD, EVEX, 1176 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1177 1178 def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1179 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1180 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1181 "${dst} {${mask}} {z}, $src}"), 1182 [(set MaskInfo.RC:$dst, 1183 (vselect MaskInfo.KRCWM:$mask, 1184 (MaskInfo.VT 1185 (bitconvert 1186 (DestInfo.VT 1187 (SrcInfo.BroadcastLdFrag addr:$src)))), 1188 MaskInfo.ImmAllZerosV))], 1189 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1190 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1191 1192 let Constraints = "$src0 = $dst", 1193 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1194 def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1195 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1196 SrcInfo.ScalarMemOp:$src), 1197 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1198 "${dst} {${mask}}, $src}"), 1199 [(set MaskInfo.RC:$dst, 1200 (vselect MaskInfo.KRCWM:$mask, 1201 (MaskInfo.VT 1202 (bitconvert 1203 (DestInfo.VT 1204 (SrcInfo.BroadcastLdFrag addr:$src)))), 1205 MaskInfo.RC:$src0))], 1206 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1207 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1208} 1209 1210// Helper class to force mask and broadcast result to same type. 1211multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, 1212 SchedWrite SchedRR, SchedWrite SchedRM, 1213 X86VectorVTInfo DestInfo, 1214 X86VectorVTInfo SrcInfo, 1215 bit IsConvertibleToThreeAddress> : 1216 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, 1217 DestInfo, DestInfo, SrcInfo, 1218 IsConvertibleToThreeAddress>; 1219 1220multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1221 AVX512VLVectorVTInfo _> { 1222 let Predicates = [HasAVX512] in { 1223 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1224 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1225 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1226 _.info128>, 1227 EVEX_V512; 1228 } 1229 1230 let Predicates = [HasVLX] in { 1231 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1232 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1233 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1234 _.info128>, 1235 EVEX_V256; 1236 } 1237} 1238 1239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1240 AVX512VLVectorVTInfo _> { 1241 let Predicates = [HasAVX512] in { 1242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1243 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1244 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1245 _.info128>, 1246 EVEX_V512; 1247 } 1248 1249 let Predicates = [HasVLX] in { 1250 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1251 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1252 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1253 _.info128>, 1254 EVEX_V256; 1255 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1256 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1257 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, 1258 _.info128>, 1259 EVEX_V128; 1260 } 1261} 1262defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1263 avx512vl_f32_info>; 1264defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1265 avx512vl_f64_info>, VEX_W1X; 1266 1267multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1268 X86VectorVTInfo _, SDPatternOperator OpNode, 1269 RegisterClass SrcRC> { 1270 let ExeDomain = _.ExeDomain in 1271 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1272 (ins SrcRC:$src), 1273 "vpbroadcast"##_.Suffix, "$src", "$src", 1274 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX, 1275 Sched<[SchedRR]>; 1276} 1277 1278multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1279 X86VectorVTInfo _, SDPatternOperator OpNode, 1280 RegisterClass SrcRC, SubRegIndex Subreg> { 1281 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1282 defm r : AVX512_maskable_custom<opc, MRMSrcReg, 1283 (outs _.RC:$dst), (ins GR32:$src), 1284 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1285 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1286 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], 1287 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1288 1289 def : Pat <(_.VT (OpNode SrcRC:$src)), 1290 (!cast<Instruction>(Name#r) 1291 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1292 1293 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1294 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, 1295 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1296 1297 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1298 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, 1299 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1300} 1301 1302multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1303 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1304 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1305 let Predicates = [prd] in 1306 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1307 OpNode, SrcRC, Subreg>, EVEX_V512; 1308 let Predicates = [prd, HasVLX] in { 1309 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1310 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1311 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1312 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1313 } 1314} 1315 1316multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1317 SDPatternOperator OpNode, 1318 RegisterClass SrcRC, Predicate prd> { 1319 let Predicates = [prd] in 1320 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1321 SrcRC>, EVEX_V512; 1322 let Predicates = [prd, HasVLX] in { 1323 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1324 SrcRC>, EVEX_V256; 1325 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1326 SrcRC>, EVEX_V128; 1327 } 1328} 1329 1330defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1331 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1332defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1333 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1334 HasBWI>; 1335defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1336 X86VBroadcast, GR32, HasAVX512>; 1337defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1338 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1339 1340multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1341 AVX512VLVectorVTInfo _, Predicate prd, 1342 bit IsConvertibleToThreeAddress> { 1343 let Predicates = [prd] in { 1344 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1345 WriteShuffle256Ld, _.info512, _.info128, 1346 IsConvertibleToThreeAddress>, 1347 EVEX_V512; 1348 } 1349 let Predicates = [prd, HasVLX] in { 1350 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1351 WriteShuffle256Ld, _.info256, _.info128, 1352 IsConvertibleToThreeAddress>, 1353 EVEX_V256; 1354 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, 1355 WriteShuffleXLd, _.info128, _.info128, 1356 IsConvertibleToThreeAddress>, 1357 EVEX_V128; 1358 } 1359} 1360 1361defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1362 avx512vl_i8_info, HasBWI, 0>; 1363defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1364 avx512vl_i16_info, HasBWI, 0>; 1365defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1366 avx512vl_i32_info, HasAVX512, 1>; 1367defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1368 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1369 1370multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1371 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1372 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1373 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1374 (_Dst.VT (X86SubVBroadcast 1375 (_Src.VT (_Src.LdFrag addr:$src))))>, 1376 Sched<[SchedWriteShuffle.YMM.Folded]>, 1377 AVX5128IBase, EVEX; 1378} 1379 1380// This should be used for the AVX512DQ broadcast instructions. It disables 1381// the unmasked patterns so that we only use the DQ instructions when masking 1382// is requested. 1383multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1384 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1385 let hasSideEffects = 0, mayLoad = 1 in 1386 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1387 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1388 (null_frag), 1389 (_Dst.VT (X86SubVBroadcast 1390 (_Src.VT (_Src.LdFrag addr:$src))))>, 1391 Sched<[SchedWriteShuffle.YMM.Folded]>, 1392 AVX5128IBase, EVEX; 1393} 1394 1395let Predicates = [HasAVX512] in { 1396 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1397 def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1398 (VPBROADCASTQZm addr:$src)>; 1399 1400 // FIXME this is to handle aligned extloads from i8. 1401 def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))), 1402 (VPBROADCASTDZm addr:$src)>; 1403} 1404 1405let Predicates = [HasVLX] in { 1406 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1407 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1408 (VPBROADCASTQZ128m addr:$src)>; 1409 def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1410 (VPBROADCASTQZ256m addr:$src)>; 1411 1412 // FIXME this is to handle aligned extloads from i8. 1413 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 1414 (VPBROADCASTDZ128m addr:$src)>; 1415 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 1416 (VPBROADCASTDZ256m addr:$src)>; 1417} 1418let Predicates = [HasVLX, HasBWI] in { 1419 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 1420 // This means we'll encounter truncated i32 loads; match that here. 1421 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1422 (VPBROADCASTWZ128m addr:$src)>; 1423 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1424 (VPBROADCASTWZ256m addr:$src)>; 1425 def : Pat<(v8i16 (X86VBroadcast 1426 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1427 (VPBROADCASTWZ128m addr:$src)>; 1428 def : Pat<(v8i16 (X86VBroadcast 1429 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1430 (VPBROADCASTWZ128m addr:$src)>; 1431 def : Pat<(v16i16 (X86VBroadcast 1432 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1433 (VPBROADCASTWZ256m addr:$src)>; 1434 def : Pat<(v16i16 (X86VBroadcast 1435 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1436 (VPBROADCASTWZ256m addr:$src)>; 1437 1438 // FIXME this is to handle aligned extloads from i8. 1439 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), 1440 (VPBROADCASTWZ128m addr:$src)>; 1441 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), 1442 (VPBROADCASTWZ256m addr:$src)>; 1443} 1444let Predicates = [HasBWI] in { 1445 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 1446 // This means we'll encounter truncated i32 loads; match that here. 1447 def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1448 (VPBROADCASTWZm addr:$src)>; 1449 def : Pat<(v32i16 (X86VBroadcast 1450 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1451 (VPBROADCASTWZm addr:$src)>; 1452 def : Pat<(v32i16 (X86VBroadcast 1453 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1454 (VPBROADCASTWZm addr:$src)>; 1455 1456 // FIXME this is to handle aligned extloads from i8. 1457 def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))), 1458 (VPBROADCASTWZm addr:$src)>; 1459} 1460 1461//===----------------------------------------------------------------------===// 1462// AVX-512 BROADCAST SUBVECTORS 1463// 1464 1465defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1466 v16i32_info, v4i32x_info>, 1467 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1468defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1469 v16f32_info, v4f32x_info>, 1470 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1471defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1472 v8i64_info, v4i64x_info>, VEX_W, 1473 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1474defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1475 v8f64_info, v4f64x_info>, VEX_W, 1476 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1477 1478let Predicates = [HasAVX512] in { 1479def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))), 1480 (VBROADCASTF64X4rm addr:$src)>; 1481def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))), 1482 (VBROADCASTI64X4rm addr:$src)>; 1483def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))), 1484 (VBROADCASTI64X4rm addr:$src)>; 1485def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))), 1486 (VBROADCASTI64X4rm addr:$src)>; 1487 1488// Provide fallback in case the load node that is used in the patterns above 1489// is used by additional users, which prevents the pattern selection. 1490def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), 1491 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1492 (v4f64 VR256X:$src), 1)>; 1493def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), 1494 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1495 (v8f32 VR256X:$src), 1)>; 1496def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), 1497 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1498 (v4i64 VR256X:$src), 1)>; 1499def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), 1500 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1501 (v8i32 VR256X:$src), 1)>; 1502def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))), 1503 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1504 (v16i16 VR256X:$src), 1)>; 1505def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))), 1506 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1507 (v32i8 VR256X:$src), 1)>; 1508 1509def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1510 (VBROADCASTF32X4rm addr:$src)>; 1511def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1512 (VBROADCASTI32X4rm addr:$src)>; 1513def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 1514 (VBROADCASTI32X4rm addr:$src)>; 1515def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 1516 (VBROADCASTI32X4rm addr:$src)>; 1517 1518// Patterns for selects of bitcasted operations. 1519def : Pat<(vselect VK16WM:$mask, 1520 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1521 (v16f32 immAllZerosV)), 1522 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1523def : Pat<(vselect VK16WM:$mask, 1524 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1525 VR512:$src0), 1526 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1527def : Pat<(vselect VK16WM:$mask, 1528 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1529 (v16i32 immAllZerosV)), 1530 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1531def : Pat<(vselect VK16WM:$mask, 1532 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1533 VR512:$src0), 1534 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1535 1536def : Pat<(vselect VK8WM:$mask, 1537 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1538 (v8f64 immAllZerosV)), 1539 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1540def : Pat<(vselect VK8WM:$mask, 1541 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1542 VR512:$src0), 1543 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1544def : Pat<(vselect VK8WM:$mask, 1545 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), 1546 (v8i64 immAllZerosV)), 1547 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1548def : Pat<(vselect VK8WM:$mask, 1549 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), 1550 VR512:$src0), 1551 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1552} 1553 1554let Predicates = [HasVLX] in { 1555defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1556 v8i32x_info, v4i32x_info>, 1557 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1558defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1559 v8f32x_info, v4f32x_info>, 1560 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1561 1562def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1563 (VBROADCASTF32X4Z256rm addr:$src)>; 1564def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1565 (VBROADCASTI32X4Z256rm addr:$src)>; 1566def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 1567 (VBROADCASTI32X4Z256rm addr:$src)>; 1568def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 1569 (VBROADCASTI32X4Z256rm addr:$src)>; 1570 1571// Patterns for selects of bitcasted operations. 1572def : Pat<(vselect VK8WM:$mask, 1573 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1574 (v8f32 immAllZerosV)), 1575 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1576def : Pat<(vselect VK8WM:$mask, 1577 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1578 VR256X:$src0), 1579 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1580def : Pat<(vselect VK8WM:$mask, 1581 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1582 (v8i32 immAllZerosV)), 1583 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1584def : Pat<(vselect VK8WM:$mask, 1585 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1586 VR256X:$src0), 1587 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1588 1589 1590// Provide fallback in case the load node that is used in the patterns above 1591// is used by additional users, which prevents the pattern selection. 1592def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 1593 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1594 (v2f64 VR128X:$src), 1)>; 1595def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 1596 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1597 (v4f32 VR128X:$src), 1)>; 1598def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 1599 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1600 (v2i64 VR128X:$src), 1)>; 1601def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 1602 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1603 (v4i32 VR128X:$src), 1)>; 1604def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 1605 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1606 (v8i16 VR128X:$src), 1)>; 1607def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 1608 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1609 (v16i8 VR128X:$src), 1)>; 1610} 1611 1612let Predicates = [HasVLX, HasDQI] in { 1613defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1614 v4i64x_info, v2i64x_info>, VEX_W1X, 1615 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1616defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1617 v4f64x_info, v2f64x_info>, VEX_W1X, 1618 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1619 1620// Patterns for selects of bitcasted operations. 1621def : Pat<(vselect VK4WM:$mask, 1622 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1623 (v4f64 immAllZerosV)), 1624 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1625def : Pat<(vselect VK4WM:$mask, 1626 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1627 VR256X:$src0), 1628 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1629def : Pat<(vselect VK4WM:$mask, 1630 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1631 (v4i64 immAllZerosV)), 1632 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1633def : Pat<(vselect VK4WM:$mask, 1634 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1635 VR256X:$src0), 1636 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1637} 1638 1639let Predicates = [HasDQI] in { 1640defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1641 v8i64_info, v2i64x_info>, VEX_W, 1642 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1643defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1644 v16i32_info, v8i32x_info>, 1645 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1646defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1647 v8f64_info, v2f64x_info>, VEX_W, 1648 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1649defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1650 v16f32_info, v8f32x_info>, 1651 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1652 1653// Patterns for selects of bitcasted operations. 1654def : Pat<(vselect VK16WM:$mask, 1655 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1656 (v16f32 immAllZerosV)), 1657 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1658def : Pat<(vselect VK16WM:$mask, 1659 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1660 VR512:$src0), 1661 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1662def : Pat<(vselect VK16WM:$mask, 1663 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1664 (v16i32 immAllZerosV)), 1665 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1666def : Pat<(vselect VK16WM:$mask, 1667 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1668 VR512:$src0), 1669 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1670 1671def : Pat<(vselect VK8WM:$mask, 1672 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1673 (v8f64 immAllZerosV)), 1674 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1675def : Pat<(vselect VK8WM:$mask, 1676 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1677 VR512:$src0), 1678 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1679def : Pat<(vselect VK8WM:$mask, 1680 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1681 (v8i64 immAllZerosV)), 1682 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1683def : Pat<(vselect VK8WM:$mask, 1684 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1685 VR512:$src0), 1686 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1687} 1688 1689multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1690 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { 1691 let Predicates = [HasDQI] in 1692 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1693 WriteShuffle256Ld, _Dst.info512, 1694 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1695 EVEX_V512; 1696 let Predicates = [HasDQI, HasVLX] in 1697 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1698 WriteShuffle256Ld, _Dst.info256, 1699 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1700 EVEX_V256; 1701} 1702 1703multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1704 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : 1705 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1706 1707 let Predicates = [HasDQI, HasVLX] in 1708 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, 1709 WriteShuffleXLd, _Dst.info128, 1710 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1711 EVEX_V128; 1712} 1713 1714defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1715 avx512vl_i32_info, avx512vl_i64_info>; 1716defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1717 avx512vl_f32_info, avx512vl_f64_info>; 1718 1719//===----------------------------------------------------------------------===// 1720// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1721//--- 1722multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1723 X86VectorVTInfo _, RegisterClass KRC> { 1724 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1725 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1726 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1727 EVEX, Sched<[WriteShuffle]>; 1728} 1729 1730multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1731 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1732 let Predicates = [HasCDI] in 1733 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1734 let Predicates = [HasCDI, HasVLX] in { 1735 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1736 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1737 } 1738} 1739 1740defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1741 avx512vl_i32_info, VK16>; 1742defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1743 avx512vl_i64_info, VK8>, VEX_W; 1744 1745//===----------------------------------------------------------------------===// 1746// -- VPERMI2 - 3 source operands form -- 1747multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1748 X86FoldableSchedWrite sched, 1749 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1750let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1751 hasSideEffects = 0 in { 1752 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1753 (ins _.RC:$src2, _.RC:$src3), 1754 OpcodeStr, "$src3, $src2", "$src2, $src3", 1755 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1756 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1757 1758 let mayLoad = 1 in 1759 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1760 (ins _.RC:$src2, _.MemOp:$src3), 1761 OpcodeStr, "$src3, $src2", "$src2, $src3", 1762 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1763 (_.VT (_.LdFrag addr:$src3)))), 1>, 1764 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1765 } 1766} 1767 1768multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1769 X86FoldableSchedWrite sched, 1770 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1771 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1772 hasSideEffects = 0, mayLoad = 1 in 1773 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1774 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1775 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1776 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1777 (_.VT (X86VPermt2 _.RC:$src2, 1778 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1779 AVX5128IBase, EVEX_4V, EVEX_B, 1780 Sched<[sched.Folded, sched.ReadAfterFold]>; 1781} 1782 1783multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1784 X86FoldableSchedWrite sched, 1785 AVX512VLVectorVTInfo VTInfo, 1786 AVX512VLVectorVTInfo ShuffleMask> { 1787 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1788 ShuffleMask.info512>, 1789 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1790 ShuffleMask.info512>, EVEX_V512; 1791 let Predicates = [HasVLX] in { 1792 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1793 ShuffleMask.info128>, 1794 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1795 ShuffleMask.info128>, EVEX_V128; 1796 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1797 ShuffleMask.info256>, 1798 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1799 ShuffleMask.info256>, EVEX_V256; 1800 } 1801} 1802 1803multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1804 X86FoldableSchedWrite sched, 1805 AVX512VLVectorVTInfo VTInfo, 1806 AVX512VLVectorVTInfo Idx, 1807 Predicate Prd> { 1808 let Predicates = [Prd] in 1809 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1810 Idx.info512>, EVEX_V512; 1811 let Predicates = [Prd, HasVLX] in { 1812 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1813 Idx.info128>, EVEX_V128; 1814 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1815 Idx.info256>, EVEX_V256; 1816 } 1817} 1818 1819defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1820 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1821defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1822 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1823defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1824 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1825 VEX_W, EVEX_CD8<16, CD8VF>; 1826defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1827 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1828 EVEX_CD8<8, CD8VF>; 1829defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1830 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1831defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1832 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1833 1834// Extra patterns to deal with extra bitcasts due to passthru and index being 1835// different types on the fp versions. 1836multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1837 X86VectorVTInfo IdxVT, 1838 X86VectorVTInfo CastVT> { 1839 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1840 (X86VPermt2 (_.VT _.RC:$src2), 1841 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3), 1842 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1843 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1844 _.RC:$src2, _.RC:$src3)>; 1845 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1846 (X86VPermt2 _.RC:$src2, 1847 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1848 (_.LdFrag addr:$src3)), 1849 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1850 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1851 _.RC:$src2, addr:$src3)>; 1852 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1853 (X86VPermt2 _.RC:$src2, 1854 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1855 (_.BroadcastLdFrag addr:$src3)), 1856 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1857 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1858 _.RC:$src2, addr:$src3)>; 1859} 1860 1861// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1862defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1863defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1864defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1865 1866// VPERMT2 1867multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1868 X86FoldableSchedWrite sched, 1869 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1870let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1871 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1872 (ins IdxVT.RC:$src2, _.RC:$src3), 1873 OpcodeStr, "$src3, $src2", "$src2, $src3", 1874 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1875 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1876 1877 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1878 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1879 OpcodeStr, "$src3, $src2", "$src2, $src3", 1880 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1881 (_.LdFrag addr:$src3))), 1>, 1882 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1883 } 1884} 1885multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1886 X86FoldableSchedWrite sched, 1887 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1888 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1889 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1890 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1891 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1892 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1893 (_.VT (X86VPermt2 _.RC:$src1, 1894 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1895 AVX5128IBase, EVEX_4V, EVEX_B, 1896 Sched<[sched.Folded, sched.ReadAfterFold]>; 1897} 1898 1899multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1900 X86FoldableSchedWrite sched, 1901 AVX512VLVectorVTInfo VTInfo, 1902 AVX512VLVectorVTInfo ShuffleMask> { 1903 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1904 ShuffleMask.info512>, 1905 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1906 ShuffleMask.info512>, EVEX_V512; 1907 let Predicates = [HasVLX] in { 1908 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1909 ShuffleMask.info128>, 1910 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1911 ShuffleMask.info128>, EVEX_V128; 1912 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1913 ShuffleMask.info256>, 1914 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1915 ShuffleMask.info256>, EVEX_V256; 1916 } 1917} 1918 1919multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1920 X86FoldableSchedWrite sched, 1921 AVX512VLVectorVTInfo VTInfo, 1922 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1923 let Predicates = [Prd] in 1924 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1925 Idx.info512>, EVEX_V512; 1926 let Predicates = [Prd, HasVLX] in { 1927 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1928 Idx.info128>, EVEX_V128; 1929 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1930 Idx.info256>, EVEX_V256; 1931 } 1932} 1933 1934defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1935 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1936defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1937 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1938defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1939 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1940 VEX_W, EVEX_CD8<16, CD8VF>; 1941defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1942 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1943 EVEX_CD8<8, CD8VF>; 1944defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1945 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1946defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1947 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1948 1949//===----------------------------------------------------------------------===// 1950// AVX-512 - BLEND using mask 1951// 1952 1953multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1954 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1955 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1956 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1957 (ins _.RC:$src1, _.RC:$src2), 1958 !strconcat(OpcodeStr, 1959 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1960 EVEX_4V, Sched<[sched]>; 1961 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1962 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1963 !strconcat(OpcodeStr, 1964 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1965 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1966 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1967 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1968 !strconcat(OpcodeStr, 1969 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1970 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1971 let mayLoad = 1 in { 1972 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1973 (ins _.RC:$src1, _.MemOp:$src2), 1974 !strconcat(OpcodeStr, 1975 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1976 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1977 Sched<[sched.Folded, sched.ReadAfterFold]>; 1978 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1979 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1980 !strconcat(OpcodeStr, 1981 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1982 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1983 Sched<[sched.Folded, sched.ReadAfterFold]>; 1984 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1985 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1986 !strconcat(OpcodeStr, 1987 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1988 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1989 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1990 } 1991 } 1992} 1993multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1994 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1995 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1996 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1997 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1998 !strconcat(OpcodeStr, 1999 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2000 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2001 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2002 Sched<[sched.Folded, sched.ReadAfterFold]>; 2003 2004 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2005 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2006 !strconcat(OpcodeStr, 2007 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 2008 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2009 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2010 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2011 2012 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2013 (ins _.RC:$src1, _.ScalarMemOp:$src2), 2014 !strconcat(OpcodeStr, 2015 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2016 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2017 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2018 Sched<[sched.Folded, sched.ReadAfterFold]>; 2019 } 2020} 2021 2022multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2023 AVX512VLVectorVTInfo VTInfo> { 2024 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2025 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2026 EVEX_V512; 2027 2028 let Predicates = [HasVLX] in { 2029 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2030 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2031 EVEX_V256; 2032 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2033 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2034 EVEX_V128; 2035 } 2036} 2037 2038multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2039 AVX512VLVectorVTInfo VTInfo> { 2040 let Predicates = [HasBWI] in 2041 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2042 EVEX_V512; 2043 2044 let Predicates = [HasBWI, HasVLX] in { 2045 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2046 EVEX_V256; 2047 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2048 EVEX_V128; 2049 } 2050} 2051 2052defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2053 avx512vl_f32_info>; 2054defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2055 avx512vl_f64_info>, VEX_W; 2056defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2057 avx512vl_i32_info>; 2058defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2059 avx512vl_i64_info>, VEX_W; 2060defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2061 avx512vl_i8_info>; 2062defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2063 avx512vl_i16_info>, VEX_W; 2064 2065//===----------------------------------------------------------------------===// 2066// Compare Instructions 2067//===----------------------------------------------------------------------===// 2068 2069// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2070 2071multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2072 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2073 X86FoldableSchedWrite sched> { 2074 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2075 (outs _.KRC:$dst), 2076 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2077 "vcmp"#_.Suffix, 2078 "$cc, $src2, $src1", "$src1, $src2, $cc", 2079 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2080 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2081 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2082 let mayLoad = 1 in 2083 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2084 (outs _.KRC:$dst), 2085 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2086 "vcmp"#_.Suffix, 2087 "$cc, $src2, $src1", "$src1, $src2, $cc", 2088 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 2089 timm:$cc), 2090 (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 2091 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2092 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2093 2094 let Uses = [MXCSR] in 2095 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2096 (outs _.KRC:$dst), 2097 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2098 "vcmp"#_.Suffix, 2099 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2100 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2101 timm:$cc), 2102 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2103 timm:$cc)>, 2104 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2105 2106 let isCodeGenOnly = 1 in { 2107 let isCommutable = 1 in 2108 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2109 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2110 !strconcat("vcmp", _.Suffix, 2111 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2112 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2113 _.FRC:$src2, 2114 timm:$cc))]>, 2115 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2116 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2117 (outs _.KRC:$dst), 2118 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2119 !strconcat("vcmp", _.Suffix, 2120 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2121 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2122 (_.ScalarLdFrag addr:$src2), 2123 timm:$cc))]>, 2124 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2125 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2126 } 2127} 2128 2129def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2130 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2131 return N->hasOneUse(); 2132}]>; 2133def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2134 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2135 return N->hasOneUse(); 2136}]>; 2137 2138let Predicates = [HasAVX512] in { 2139 let ExeDomain = SSEPackedSingle in 2140 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2141 X86cmpms_su, X86cmpmsSAE_su, 2142 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2143 let ExeDomain = SSEPackedDouble in 2144 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2145 X86cmpms_su, X86cmpmsSAE_su, 2146 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2147} 2148 2149multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2150 X86FoldableSchedWrite sched, 2151 X86VectorVTInfo _, bit IsCommutable> { 2152 let isCommutable = IsCommutable, hasSideEffects = 0 in 2153 def rr : AVX512BI<opc, MRMSrcReg, 2154 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2155 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2156 []>, EVEX_4V, Sched<[sched]>; 2157 let mayLoad = 1, hasSideEffects = 0 in 2158 def rm : AVX512BI<opc, MRMSrcMem, 2159 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2160 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2161 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2162 let isCommutable = IsCommutable, hasSideEffects = 0 in 2163 def rrk : AVX512BI<opc, MRMSrcReg, 2164 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2165 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2166 "$dst {${mask}}, $src1, $src2}"), 2167 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2168 let mayLoad = 1, hasSideEffects = 0 in 2169 def rmk : AVX512BI<opc, MRMSrcMem, 2170 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2171 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2172 "$dst {${mask}}, $src1, $src2}"), 2173 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2174} 2175 2176multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2177 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2178 bit IsCommutable> : 2179 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2180 let mayLoad = 1, hasSideEffects = 0 in { 2181 def rmb : AVX512BI<opc, MRMSrcMem, 2182 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2183 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2184 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2185 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2186 def rmbk : AVX512BI<opc, MRMSrcMem, 2187 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2188 _.ScalarMemOp:$src2), 2189 !strconcat(OpcodeStr, 2190 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2191 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2192 []>, EVEX_4V, EVEX_K, EVEX_B, 2193 Sched<[sched.Folded, sched.ReadAfterFold]>; 2194 } 2195} 2196 2197multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2198 X86SchedWriteWidths sched, 2199 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2200 bit IsCommutable = 0> { 2201 let Predicates = [prd] in 2202 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2203 VTInfo.info512, IsCommutable>, EVEX_V512; 2204 2205 let Predicates = [prd, HasVLX] in { 2206 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2207 VTInfo.info256, IsCommutable>, EVEX_V256; 2208 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2209 VTInfo.info128, IsCommutable>, EVEX_V128; 2210 } 2211} 2212 2213multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2214 X86SchedWriteWidths sched, 2215 AVX512VLVectorVTInfo VTInfo, 2216 Predicate prd, bit IsCommutable = 0> { 2217 let Predicates = [prd] in 2218 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2219 VTInfo.info512, IsCommutable>, EVEX_V512; 2220 2221 let Predicates = [prd, HasVLX] in { 2222 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2223 VTInfo.info256, IsCommutable>, EVEX_V256; 2224 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2225 VTInfo.info128, IsCommutable>, EVEX_V128; 2226 } 2227} 2228 2229// This fragment treats X86cmpm as commutable to help match loads in both 2230// operands for PCMPEQ. 2231def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2232def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2233 (setcc node:$src1, node:$src2, SETGT)>; 2234 2235// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2236// increase the pattern complexity the way an immediate would. 2237let AddedComplexity = 2 in { 2238// FIXME: Is there a better scheduler class for VPCMP? 2239defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2240 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2241 EVEX_CD8<8, CD8VF>, VEX_WIG; 2242 2243defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2244 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2245 EVEX_CD8<16, CD8VF>, VEX_WIG; 2246 2247defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2248 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2249 EVEX_CD8<32, CD8VF>; 2250 2251defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2252 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2253 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2254 2255defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2256 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2257 EVEX_CD8<8, CD8VF>, VEX_WIG; 2258 2259defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2260 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2261 EVEX_CD8<16, CD8VF>, VEX_WIG; 2262 2263defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2264 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2265 EVEX_CD8<32, CD8VF>; 2266 2267defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2268 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2269 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2270} 2271 2272multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2273 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su, 2274 X86FoldableSchedWrite sched, 2275 X86VectorVTInfo _, string Name> { 2276 let isCommutable = 1 in 2277 def rri : AVX512AIi8<opc, MRMSrcReg, 2278 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2279 !strconcat("vpcmp", Suffix, 2280 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2281 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2282 (_.VT _.RC:$src2), 2283 cond)))]>, 2284 EVEX_4V, Sched<[sched]>; 2285 def rmi : AVX512AIi8<opc, MRMSrcMem, 2286 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2287 !strconcat("vpcmp", Suffix, 2288 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2289 [(set _.KRC:$dst, (_.KVT 2290 (Frag:$cc 2291 (_.VT _.RC:$src1), 2292 (_.VT (_.LdFrag addr:$src2)), 2293 cond)))]>, 2294 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2295 let isCommutable = 1 in 2296 def rrik : AVX512AIi8<opc, MRMSrcReg, 2297 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2298 u8imm:$cc), 2299 !strconcat("vpcmp", Suffix, 2300 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2301 "$dst {${mask}}, $src1, $src2, $cc}"), 2302 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2303 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2304 (_.VT _.RC:$src2), 2305 cond))))]>, 2306 EVEX_4V, EVEX_K, Sched<[sched]>; 2307 def rmik : AVX512AIi8<opc, MRMSrcMem, 2308 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2309 u8imm:$cc), 2310 !strconcat("vpcmp", Suffix, 2311 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2312 "$dst {${mask}}, $src1, $src2, $cc}"), 2313 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2314 (_.KVT 2315 (Frag_su:$cc 2316 (_.VT _.RC:$src1), 2317 (_.VT (_.LdFrag addr:$src2)), 2318 cond))))]>, 2319 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2320 2321 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2), 2322 (_.VT _.RC:$src1), cond)), 2323 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2324 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2325 2326 def : Pat<(and _.KRCWM:$mask, 2327 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2), 2328 (_.VT _.RC:$src1), cond))), 2329 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2330 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2331 (CommFrag.OperandTransform $cc))>; 2332} 2333 2334multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2335 PatFrag Frag_su, PatFrag CommFrag, 2336 PatFrag CommFrag_su, X86FoldableSchedWrite sched, 2337 X86VectorVTInfo _, string Name> : 2338 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2339 sched, _, Name> { 2340 def rmib : AVX512AIi8<opc, MRMSrcMem, 2341 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2342 u8imm:$cc), 2343 !strconcat("vpcmp", Suffix, 2344 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2345 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2346 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2347 (_.VT _.RC:$src1), 2348 (_.BroadcastLdFrag addr:$src2), 2349 cond)))]>, 2350 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2351 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2352 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2353 _.ScalarMemOp:$src2, u8imm:$cc), 2354 !strconcat("vpcmp", Suffix, 2355 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2356 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2357 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2358 (_.KVT (Frag_su:$cc 2359 (_.VT _.RC:$src1), 2360 (_.BroadcastLdFrag addr:$src2), 2361 cond))))]>, 2362 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2363 2364 def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2), 2365 (_.VT _.RC:$src1), cond)), 2366 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2367 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2368 2369 def : Pat<(and _.KRCWM:$mask, 2370 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2), 2371 (_.VT _.RC:$src1), cond))), 2372 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2373 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2374 (CommFrag_su.OperandTransform $cc))>; 2375} 2376 2377multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2378 PatFrag Frag_su, PatFrag CommFrag, 2379 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2380 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2381 let Predicates = [prd] in 2382 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2383 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2384 2385 let Predicates = [prd, HasVLX] in { 2386 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2387 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2388 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2389 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2390 } 2391} 2392 2393multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2394 PatFrag Frag_su, PatFrag CommFrag, 2395 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2396 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2397 let Predicates = [prd] in 2398 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2399 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2400 2401 let Predicates = [prd, HasVLX] in { 2402 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2403 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2404 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2405 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2406 } 2407} 2408 2409def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2410 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2411 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2412 return getI8Imm(SSECC, SDLoc(N)); 2413}]>; 2414 2415// Swapped operand version of the above. 2416def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2417 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2418 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2419 SSECC = X86::getSwappedVPCMPImm(SSECC); 2420 return getI8Imm(SSECC, SDLoc(N)); 2421}]>; 2422 2423def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2424 (setcc node:$src1, node:$src2, node:$cc), [{ 2425 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2426 return !ISD::isUnsignedIntSetCC(CC); 2427}], X86pcmpm_imm>; 2428 2429def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2430 (setcc node:$src1, node:$src2, node:$cc), [{ 2431 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2432 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2433}], X86pcmpm_imm>; 2434 2435// Same as above, but commutes immediate. Use for load folding. 2436def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2437 (setcc node:$src1, node:$src2, node:$cc), [{ 2438 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2439 return !ISD::isUnsignedIntSetCC(CC); 2440}], X86pcmpm_imm_commute>; 2441 2442def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2443 (setcc node:$src1, node:$src2, node:$cc), [{ 2444 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2445 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2446}], X86pcmpm_imm_commute>; 2447 2448def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2449 (setcc node:$src1, node:$src2, node:$cc), [{ 2450 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2451 return ISD::isUnsignedIntSetCC(CC); 2452}], X86pcmpm_imm>; 2453 2454def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2455 (setcc node:$src1, node:$src2, node:$cc), [{ 2456 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2457 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2458}], X86pcmpm_imm>; 2459 2460// Same as above, but commutes immediate. Use for load folding. 2461def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2462 (setcc node:$src1, node:$src2, node:$cc), [{ 2463 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2464 return ISD::isUnsignedIntSetCC(CC); 2465}], X86pcmpm_imm_commute>; 2466 2467def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2468 (setcc node:$src1, node:$src2, node:$cc), [{ 2469 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2470 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2471}], X86pcmpm_imm_commute>; 2472 2473// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2474defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2475 X86pcmpm_commute, X86pcmpm_commute_su, 2476 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2477 EVEX_CD8<8, CD8VF>; 2478defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2479 X86pcmpum_commute, X86pcmpum_commute_su, 2480 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2481 EVEX_CD8<8, CD8VF>; 2482 2483defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2484 X86pcmpm_commute, X86pcmpm_commute_su, 2485 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2486 VEX_W, EVEX_CD8<16, CD8VF>; 2487defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2488 X86pcmpum_commute, X86pcmpum_commute_su, 2489 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2490 VEX_W, EVEX_CD8<16, CD8VF>; 2491 2492defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2493 X86pcmpm_commute, X86pcmpm_commute_su, 2494 SchedWriteVecALU, avx512vl_i32_info, 2495 HasAVX512>, EVEX_CD8<32, CD8VF>; 2496defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2497 X86pcmpum_commute, X86pcmpum_commute_su, 2498 SchedWriteVecALU, avx512vl_i32_info, 2499 HasAVX512>, EVEX_CD8<32, CD8VF>; 2500 2501defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2502 X86pcmpm_commute, X86pcmpm_commute_su, 2503 SchedWriteVecALU, avx512vl_i64_info, 2504 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2505defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2506 X86pcmpum_commute, X86pcmpum_commute_su, 2507 SchedWriteVecALU, avx512vl_i64_info, 2508 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2509 2510def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2511 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2512 return N->hasOneUse(); 2513}]>; 2514def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2515 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{ 2516 return N->hasOneUse(); 2517}]>; 2518 2519def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2520 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2521 return getI8Imm(Imm, SDLoc(N)); 2522}]>; 2523 2524multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2525 string Name> { 2526let Uses = [MXCSR], mayRaiseFPException = 1 in { 2527 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2528 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2529 "vcmp"#_.Suffix, 2530 "$cc, $src2, $src1", "$src1, $src2, $cc", 2531 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2532 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2533 1>, Sched<[sched]>; 2534 2535 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2536 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2537 "vcmp"#_.Suffix, 2538 "$cc, $src2, $src1", "$src1, $src2, $cc", 2539 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2540 timm:$cc), 2541 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2542 timm:$cc)>, 2543 Sched<[sched.Folded, sched.ReadAfterFold]>; 2544 2545 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2546 (outs _.KRC:$dst), 2547 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2548 "vcmp"#_.Suffix, 2549 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2550 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2551 (X86any_cmpm (_.VT _.RC:$src1), 2552 (_.VT (_.BroadcastLdFrag addr:$src2)), 2553 timm:$cc), 2554 (X86cmpm_su (_.VT _.RC:$src1), 2555 (_.VT (_.BroadcastLdFrag addr:$src2)), 2556 timm:$cc)>, 2557 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2558 } 2559 2560 // Patterns for selecting with loads in other operand. 2561 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2562 timm:$cc), 2563 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2564 (X86cmpm_imm_commute timm:$cc))>; 2565 2566 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2567 (_.VT _.RC:$src1), 2568 timm:$cc)), 2569 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2570 _.RC:$src1, addr:$src2, 2571 (X86cmpm_imm_commute timm:$cc))>; 2572 2573 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2574 (_.VT _.RC:$src1), timm:$cc), 2575 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2576 (X86cmpm_imm_commute timm:$cc))>; 2577 2578 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2579 (_.VT _.RC:$src1), 2580 timm:$cc)), 2581 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2582 _.RC:$src1, addr:$src2, 2583 (X86cmpm_imm_commute timm:$cc))>; 2584} 2585 2586multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2587 // comparison code form (VCMP[EQ/LT/LE/...] 2588 let Uses = [MXCSR] in 2589 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2590 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2591 "vcmp"#_.Suffix, 2592 "$cc, {sae}, $src2, $src1", 2593 "$src1, $src2, {sae}, $cc", 2594 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2595 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2596 timm:$cc)>, 2597 EVEX_B, Sched<[sched]>; 2598} 2599 2600multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 2601 let Predicates = [HasAVX512] in { 2602 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2603 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2604 2605 } 2606 let Predicates = [HasAVX512,HasVLX] in { 2607 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2608 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2609 } 2610} 2611 2612defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2613 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2614defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2615 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2616 2617// Patterns to select fp compares with load as first operand. 2618let Predicates = [HasAVX512] in { 2619 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, 2620 timm:$cc)), 2621 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2622 2623 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, 2624 timm:$cc)), 2625 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2626} 2627 2628// ---------------------------------------------------------------- 2629// FPClass 2630 2631def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2632 (X86Vfpclasss node:$src1, node:$src2), [{ 2633 return N->hasOneUse(); 2634}]>; 2635 2636def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2637 (X86Vfpclass node:$src1, node:$src2), [{ 2638 return N->hasOneUse(); 2639}]>; 2640 2641//handle fpclass instruction mask = op(reg_scalar,imm) 2642// op(mem_scalar,imm) 2643multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2644 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2645 Predicate prd> { 2646 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2647 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2648 (ins _.RC:$src1, i32u8imm:$src2), 2649 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2650 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2651 (i32 timm:$src2)))]>, 2652 Sched<[sched]>; 2653 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2654 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2655 OpcodeStr##_.Suffix# 2656 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2657 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2658 (X86Vfpclasss_su (_.VT _.RC:$src1), 2659 (i32 timm:$src2))))]>, 2660 EVEX_K, Sched<[sched]>; 2661 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2662 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2663 OpcodeStr##_.Suffix## 2664 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2665 [(set _.KRC:$dst, 2666 (X86Vfpclasss _.ScalarIntMemCPat:$src1, 2667 (i32 timm:$src2)))]>, 2668 Sched<[sched.Folded, sched.ReadAfterFold]>; 2669 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2670 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2671 OpcodeStr##_.Suffix## 2672 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2673 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2674 (X86Vfpclasss_su _.ScalarIntMemCPat:$src1, 2675 (i32 timm:$src2))))]>, 2676 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2677 } 2678} 2679 2680//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2681// fpclass(reg_vec, mem_vec, imm) 2682// fpclass(reg_vec, broadcast(eltVt), imm) 2683multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2684 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2685 string mem>{ 2686 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2687 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2688 (ins _.RC:$src1, i32u8imm:$src2), 2689 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2690 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2691 (i32 timm:$src2)))]>, 2692 Sched<[sched]>; 2693 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2694 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2695 OpcodeStr##_.Suffix# 2696 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2697 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2698 (X86Vfpclass_su (_.VT _.RC:$src1), 2699 (i32 timm:$src2))))]>, 2700 EVEX_K, Sched<[sched]>; 2701 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2702 (ins _.MemOp:$src1, i32u8imm:$src2), 2703 OpcodeStr##_.Suffix#"{"#mem#"}"# 2704 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2705 [(set _.KRC:$dst,(X86Vfpclass 2706 (_.VT (_.LdFrag addr:$src1)), 2707 (i32 timm:$src2)))]>, 2708 Sched<[sched.Folded, sched.ReadAfterFold]>; 2709 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2710 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2711 OpcodeStr##_.Suffix#"{"#mem#"}"# 2712 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2713 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2714 (_.VT (_.LdFrag addr:$src1)), 2715 (i32 timm:$src2))))]>, 2716 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2717 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2718 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2719 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"## 2720 _.BroadcastStr##", $dst|$dst, ${src1}" 2721 ##_.BroadcastStr##", $src2}", 2722 [(set _.KRC:$dst,(X86Vfpclass 2723 (_.VT (_.BroadcastLdFrag addr:$src1)), 2724 (i32 timm:$src2)))]>, 2725 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2726 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2727 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2728 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"## 2729 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## 2730 _.BroadcastStr##", $src2}", 2731 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2732 (_.VT (_.BroadcastLdFrag addr:$src1)), 2733 (i32 timm:$src2))))]>, 2734 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2735 } 2736 2737 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2738 // the memory form. 2739 def : InstAlias<OpcodeStr#_.Suffix#mem# 2740 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2741 (!cast<Instruction>(NAME#"rr") 2742 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2743 def : InstAlias<OpcodeStr#_.Suffix#mem# 2744 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2745 (!cast<Instruction>(NAME#"rrk") 2746 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2747 def : InstAlias<OpcodeStr#_.Suffix#mem# 2748 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2749 _.BroadcastStr#", $src2}", 2750 (!cast<Instruction>(NAME#"rmb") 2751 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2752 def : InstAlias<OpcodeStr#_.Suffix#mem# 2753 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2754 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2755 (!cast<Instruction>(NAME#"rmbk") 2756 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2757} 2758 2759multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2760 bits<8> opc, X86SchedWriteWidths sched, 2761 Predicate prd>{ 2762 let Predicates = [prd] in { 2763 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2764 _.info512, "z">, EVEX_V512; 2765 } 2766 let Predicates = [prd, HasVLX] in { 2767 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2768 _.info128, "x">, EVEX_V128; 2769 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2770 _.info256, "y">, EVEX_V256; 2771 } 2772} 2773 2774multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2775 bits<8> opcScalar, X86SchedWriteWidths sched, 2776 Predicate prd> { 2777 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2778 sched, prd>, 2779 EVEX_CD8<32, CD8VF>; 2780 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2781 sched, prd>, 2782 EVEX_CD8<64, CD8VF> , VEX_W; 2783 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2784 sched.Scl, f32x_info, prd>, VEX_LIG, 2785 EVEX_CD8<32, CD8VT1>; 2786 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2787 sched.Scl, f64x_info, prd>, VEX_LIG, 2788 EVEX_CD8<64, CD8VT1>, VEX_W; 2789} 2790 2791defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp, 2792 HasDQI>, AVX512AIi8Base, EVEX; 2793 2794//----------------------------------------------------------------- 2795// Mask register copy, including 2796// - copy between mask registers 2797// - load/store mask registers 2798// - copy from GPR to mask register and vice versa 2799// 2800multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2801 string OpcodeStr, RegisterClass KRC, 2802 ValueType vvt, X86MemOperand x86memop> { 2803 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2804 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2805 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2806 Sched<[WriteMove]>; 2807 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2808 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2809 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2810 Sched<[WriteLoad]>; 2811 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2812 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2813 [(store KRC:$src, addr:$dst)]>, 2814 Sched<[WriteStore]>; 2815} 2816 2817multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2818 string OpcodeStr, 2819 RegisterClass KRC, RegisterClass GRC> { 2820 let hasSideEffects = 0 in { 2821 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2822 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2823 Sched<[WriteMove]>; 2824 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2825 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2826 Sched<[WriteMove]>; 2827 } 2828} 2829 2830let Predicates = [HasDQI] in 2831 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2832 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2833 VEX, PD; 2834 2835let Predicates = [HasAVX512] in 2836 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2837 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2838 VEX, PS; 2839 2840let Predicates = [HasBWI] in { 2841 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2842 VEX, PD, VEX_W; 2843 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2844 VEX, XD; 2845 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2846 VEX, PS, VEX_W; 2847 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2848 VEX, XD, VEX_W; 2849} 2850 2851// GR from/to mask register 2852def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2853 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2854def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2855 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2856 2857def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2858 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2859def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2860 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2861 2862def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2863 (KMOVWrk VK16:$src)>; 2864def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2865 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2866def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2867 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2868def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2869 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2870 2871def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2872 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2873def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2874 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2875def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2876 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2877def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2878 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2879 2880def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2881 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2882def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2883 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2884def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2885 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2886def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2887 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2888 2889// Load/store kreg 2890let Predicates = [HasDQI] in { 2891 def : Pat<(store VK1:$src, addr:$dst), 2892 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; 2893 2894 def : Pat<(v1i1 (load addr:$src)), 2895 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2896 def : Pat<(v2i1 (load addr:$src)), 2897 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2898 def : Pat<(v4i1 (load addr:$src)), 2899 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2900} 2901 2902let Predicates = [HasAVX512] in { 2903 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2904 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2905 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2906 (KMOVWkm addr:$src)>; 2907} 2908 2909def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2910 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2911 SDTCVecEltisVT<1, i1>, 2912 SDTCisPtrTy<2>]>>; 2913 2914let Predicates = [HasAVX512] in { 2915 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2916 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2917 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2918 2919 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2920 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2921 2922 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2923 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2924 2925 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2926 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2927 } 2928 2929 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2930 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2931 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2932 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2933 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2934 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2935 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2936 2937 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2938 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2939 (COPY_TO_REGCLASS 2940 (KMOVWkr (AND32ri8 2941 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2942 (i32 1))), VK16)>; 2943} 2944 2945// Mask unary operation 2946// - KNOT 2947multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2948 RegisterClass KRC, SDPatternOperator OpNode, 2949 X86FoldableSchedWrite sched, Predicate prd> { 2950 let Predicates = [prd] in 2951 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2952 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2953 [(set KRC:$dst, (OpNode KRC:$src))]>, 2954 Sched<[sched]>; 2955} 2956 2957multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2958 SDPatternOperator OpNode, 2959 X86FoldableSchedWrite sched> { 2960 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2961 sched, HasDQI>, VEX, PD; 2962 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2963 sched, HasAVX512>, VEX, PS; 2964 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2965 sched, HasBWI>, VEX, PD, VEX_W; 2966 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2967 sched, HasBWI>, VEX, PS, VEX_W; 2968} 2969 2970// TODO - do we need a X86SchedWriteWidths::KMASK type? 2971defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2972 2973// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2974let Predicates = [HasAVX512, NoDQI] in 2975def : Pat<(vnot VK8:$src), 2976 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2977 2978def : Pat<(vnot VK4:$src), 2979 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2980def : Pat<(vnot VK2:$src), 2981 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2982 2983// Mask binary operation 2984// - KAND, KANDN, KOR, KXNOR, KXOR 2985multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2986 RegisterClass KRC, SDPatternOperator OpNode, 2987 X86FoldableSchedWrite sched, Predicate prd, 2988 bit IsCommutable> { 2989 let Predicates = [prd], isCommutable = IsCommutable in 2990 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2991 !strconcat(OpcodeStr, 2992 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2993 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2994 Sched<[sched]>; 2995} 2996 2997multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2998 SDPatternOperator OpNode, 2999 X86FoldableSchedWrite sched, bit IsCommutable, 3000 Predicate prdW = HasAVX512> { 3001 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3002 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 3003 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3004 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 3005 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3006 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 3007 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3008 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3009} 3010 3011def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; 3012def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; 3013// These nodes use 'vnot' instead of 'not' to support vectors. 3014def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3015def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3016 3017// TODO - do we need a X86SchedWriteWidths::KMASK type? 3018defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3019defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3020defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3021defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3022defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3023defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3024 3025multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, 3026 Instruction Inst> { 3027 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3028 // for the DQI set, this type is legal and KxxxB instruction is used 3029 let Predicates = [NoDQI] in 3030 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3031 (COPY_TO_REGCLASS 3032 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3033 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3034 3035 // All types smaller than 8 bits require conversion anyway 3036 def : Pat<(OpNode VK1:$src1, VK1:$src2), 3037 (COPY_TO_REGCLASS (Inst 3038 (COPY_TO_REGCLASS VK1:$src1, VK16), 3039 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3040 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3041 (COPY_TO_REGCLASS (Inst 3042 (COPY_TO_REGCLASS VK2:$src1, VK16), 3043 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; 3044 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3045 (COPY_TO_REGCLASS (Inst 3046 (COPY_TO_REGCLASS VK4:$src1, VK16), 3047 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; 3048} 3049 3050defm : avx512_binop_pat<and, and, KANDWrr>; 3051defm : avx512_binop_pat<vandn, andn, KANDNWrr>; 3052defm : avx512_binop_pat<or, or, KORWrr>; 3053defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>; 3054defm : avx512_binop_pat<xor, xor, KXORWrr>; 3055 3056// Mask unpacking 3057multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3058 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3059 Predicate prd> { 3060 let Predicates = [prd] in { 3061 let hasSideEffects = 0 in 3062 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3063 (ins Src.KRC:$src1, Src.KRC:$src2), 3064 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3065 VEX_4V, VEX_L, Sched<[sched]>; 3066 3067 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3068 (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>; 3069 } 3070} 3071 3072defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3073defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3074defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3075 3076// Mask bit testing 3077multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3078 SDNode OpNode, X86FoldableSchedWrite sched, 3079 Predicate prd> { 3080 let Predicates = [prd], Defs = [EFLAGS] in 3081 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3082 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3083 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3084 Sched<[sched]>; 3085} 3086 3087multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3088 X86FoldableSchedWrite sched, 3089 Predicate prdW = HasAVX512> { 3090 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3091 VEX, PD; 3092 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3093 VEX, PS; 3094 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3095 VEX, PS, VEX_W; 3096 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3097 VEX, PD, VEX_W; 3098} 3099 3100// TODO - do we need a X86SchedWriteWidths::KMASK type? 3101defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3102defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3103 3104// Mask shift 3105multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3106 SDNode OpNode, X86FoldableSchedWrite sched> { 3107 let Predicates = [HasAVX512] in 3108 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3109 !strconcat(OpcodeStr, 3110 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3111 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3112 Sched<[sched]>; 3113} 3114 3115multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3116 SDNode OpNode, X86FoldableSchedWrite sched> { 3117 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3118 sched>, VEX, TAPD, VEX_W; 3119 let Predicates = [HasDQI] in 3120 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3121 sched>, VEX, TAPD; 3122 let Predicates = [HasBWI] in { 3123 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3124 sched>, VEX, TAPD, VEX_W; 3125 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3126 sched>, VEX, TAPD; 3127 } 3128} 3129 3130defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3131defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3132 3133// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3134multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3135 string InstStr, 3136 X86VectorVTInfo Narrow, 3137 X86VectorVTInfo Wide> { 3138def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3139 (Narrow.VT Narrow.RC:$src2), cond)), 3140 (COPY_TO_REGCLASS 3141 (!cast<Instruction>(InstStr#"Zrri") 3142 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3143 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3144 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3145 3146def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3147 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3148 (Narrow.VT Narrow.RC:$src2), 3149 cond)))), 3150 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3151 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3152 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3153 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3154 (Frag_su.OperandTransform $cc)), Narrow.KRC)>; 3155} 3156 3157multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3158 PatFrag CommFrag, PatFrag CommFrag_su, 3159 string InstStr, 3160 X86VectorVTInfo Narrow, 3161 X86VectorVTInfo Wide> { 3162// Broadcast load. 3163def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3164 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3165 (COPY_TO_REGCLASS 3166 (!cast<Instruction>(InstStr#"Zrmib") 3167 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3168 addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; 3169 3170def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3171 (Narrow.KVT 3172 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3173 (Narrow.BroadcastLdFrag addr:$src2), 3174 cond)))), 3175 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3176 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3177 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3178 addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; 3179 3180// Commuted with broadcast load. 3181def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3182 (Narrow.VT Narrow.RC:$src1), 3183 cond)), 3184 (COPY_TO_REGCLASS 3185 (!cast<Instruction>(InstStr#"Zrmib") 3186 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3187 addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; 3188 3189def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3190 (Narrow.KVT 3191 (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3192 (Narrow.VT Narrow.RC:$src1), 3193 cond)))), 3194 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3195 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3196 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3197 addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; 3198} 3199 3200// Same as above, but for fp types which don't use PatFrags. 3201multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3202 X86VectorVTInfo Narrow, 3203 X86VectorVTInfo Wide> { 3204def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1), 3205 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3206 (COPY_TO_REGCLASS 3207 (!cast<Instruction>(InstStr#"Zrri") 3208 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3209 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3210 timm:$cc), Narrow.KRC)>; 3211 3212def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3213 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3214 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3215 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3216 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3217 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3218 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3219 timm:$cc), Narrow.KRC)>; 3220 3221// Broadcast load. 3222def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1), 3223 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3224 (COPY_TO_REGCLASS 3225 (!cast<Instruction>(InstStr#"Zrmbi") 3226 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3227 addr:$src2, timm:$cc), Narrow.KRC)>; 3228 3229def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3230 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3231 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3232 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3233 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3234 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3235 addr:$src2, timm:$cc), Narrow.KRC)>; 3236 3237// Commuted with broadcast load. 3238def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3239 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3240 (COPY_TO_REGCLASS 3241 (!cast<Instruction>(InstStr#"Zrmbi") 3242 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3243 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3244 3245def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3246 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3247 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3248 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3249 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3250 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3251 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3252} 3253 3254let Predicates = [HasAVX512, NoVLX] in { 3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3256 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3257 3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3259 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3260 3261 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3262 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3263 3264 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3265 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3266 3267 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>; 3268 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3269 3270 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>; 3271 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3272 3273 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3274 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3275 3276 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3277 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3278 3279 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3280 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3281 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3282 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3283} 3284 3285let Predicates = [HasBWI, NoVLX] in { 3286 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3287 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3288 3289 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3290 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3291 3292 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3293 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3294 3295 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3296 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3297} 3298 3299// Mask setting all 0s or 1s 3300multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { 3301 let Predicates = [HasAVX512] in 3302 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3303 SchedRW = [WriteZero] in 3304 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3305 [(set KRC:$dst, (VT Val))]>; 3306} 3307 3308multiclass avx512_mask_setop_w<PatFrag Val> { 3309 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3310 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3311 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3312} 3313 3314defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3315defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3316 3317// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3318let Predicates = [HasAVX512] in { 3319 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3320 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3321 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3322 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3323 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3324 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3325 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3326 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3327} 3328 3329// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3330multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3331 RegisterClass RC, ValueType VT> { 3332 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3333 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3334 3335 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3336 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3337} 3338defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3339defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3340defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3341defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3342defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3343defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3344 3345defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3346defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3347defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3348defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3349defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3350 3351defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3352defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3353defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3354defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3355 3356defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3357defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3358defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3359 3360defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3361defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3362 3363defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3364 3365//===----------------------------------------------------------------------===// 3366// AVX-512 - Aligned and unaligned load and store 3367// 3368 3369multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3370 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3371 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3372 bit NoRMPattern = 0, 3373 SDPatternOperator SelectOprr = vselect> { 3374 let hasSideEffects = 0 in { 3375 let isMoveReg = 1 in 3376 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3377 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3378 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3379 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3380 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3381 (ins _.KRCWM:$mask, _.RC:$src), 3382 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3383 "${dst} {${mask}} {z}, $src}"), 3384 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3385 (_.VT _.RC:$src), 3386 _.ImmAllZerosV)))], _.ExeDomain>, 3387 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3388 3389 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3390 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3391 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3392 !if(NoRMPattern, [], 3393 [(set _.RC:$dst, 3394 (_.VT (ld_frag addr:$src)))]), 3395 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3396 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3397 3398 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3399 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3400 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3401 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3402 "${dst} {${mask}}, $src1}"), 3403 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3404 (_.VT _.RC:$src1), 3405 (_.VT _.RC:$src0))))], _.ExeDomain>, 3406 EVEX, EVEX_K, Sched<[Sched.RR]>; 3407 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3408 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3409 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3410 "${dst} {${mask}}, $src1}"), 3411 [(set _.RC:$dst, (_.VT 3412 (vselect _.KRCWM:$mask, 3413 (_.VT (ld_frag addr:$src1)), 3414 (_.VT _.RC:$src0))))], _.ExeDomain>, 3415 EVEX, EVEX_K, Sched<[Sched.RM]>; 3416 } 3417 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3418 (ins _.KRCWM:$mask, _.MemOp:$src), 3419 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3420 "${dst} {${mask}} {z}, $src}", 3421 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, 3422 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3423 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3424 } 3425 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3426 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3427 3428 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3429 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3430 3431 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3432 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0, 3433 _.KRCWM:$mask, addr:$ptr)>; 3434} 3435 3436multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3437 AVX512VLVectorVTInfo _, Predicate prd, 3438 X86SchedWriteMoveLSWidths Sched, 3439 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3440 let Predicates = [prd] in 3441 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3442 _.info512.AlignedLdFrag, masked_load_aligned, 3443 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3444 3445 let Predicates = [prd, HasVLX] in { 3446 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3447 _.info256.AlignedLdFrag, masked_load_aligned, 3448 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3449 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3450 _.info128.AlignedLdFrag, masked_load_aligned, 3451 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3452 } 3453} 3454 3455multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3456 AVX512VLVectorVTInfo _, Predicate prd, 3457 X86SchedWriteMoveLSWidths Sched, 3458 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3459 SDPatternOperator SelectOprr = vselect> { 3460 let Predicates = [prd] in 3461 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3462 masked_load, Sched.ZMM, "", 3463 NoRMPattern, SelectOprr>, EVEX_V512; 3464 3465 let Predicates = [prd, HasVLX] in { 3466 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3467 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3468 NoRMPattern, SelectOprr>, EVEX_V256; 3469 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3470 masked_load, Sched.XMM, EVEX2VEXOvrd, 3471 NoRMPattern, SelectOprr>, EVEX_V128; 3472 } 3473} 3474 3475multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3476 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3477 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3478 bit NoMRPattern = 0> { 3479 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3480 let isMoveReg = 1 in 3481 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3482 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3483 [], _.ExeDomain>, EVEX, 3484 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3485 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3486 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3487 (ins _.KRCWM:$mask, _.RC:$src), 3488 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3489 "${dst} {${mask}}, $src}", 3490 [], _.ExeDomain>, EVEX, EVEX_K, 3491 FoldGenData<BaseName#_.ZSuffix#rrk>, 3492 Sched<[Sched.RR]>; 3493 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3494 (ins _.KRCWM:$mask, _.RC:$src), 3495 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3496 "${dst} {${mask}} {z}, $src}", 3497 [], _.ExeDomain>, EVEX, EVEX_KZ, 3498 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3499 Sched<[Sched.RR]>; 3500 } 3501 3502 let hasSideEffects = 0, mayStore = 1 in 3503 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3504 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3505 !if(NoMRPattern, [], 3506 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3507 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3508 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3509 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3510 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3511 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3512 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3513 NotMemoryFoldable; 3514 3515 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3516 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3517 _.KRCWM:$mask, _.RC:$src)>; 3518 3519 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3520 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3521 _.RC:$dst, _.RC:$src), 0>; 3522 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3523 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3524 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3525 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3526 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3527 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3528} 3529 3530multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3531 AVX512VLVectorVTInfo _, Predicate prd, 3532 X86SchedWriteMoveLSWidths Sched, 3533 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3534 let Predicates = [prd] in 3535 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3536 masked_store, Sched.ZMM, "", 3537 NoMRPattern>, EVEX_V512; 3538 let Predicates = [prd, HasVLX] in { 3539 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3540 masked_store, Sched.YMM, 3541 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3542 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3543 masked_store, Sched.XMM, EVEX2VEXOvrd, 3544 NoMRPattern>, EVEX_V128; 3545 } 3546} 3547 3548multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3549 AVX512VLVectorVTInfo _, Predicate prd, 3550 X86SchedWriteMoveLSWidths Sched, 3551 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3552 let Predicates = [prd] in 3553 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3554 masked_store_aligned, Sched.ZMM, "", 3555 NoMRPattern>, EVEX_V512; 3556 3557 let Predicates = [prd, HasVLX] in { 3558 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3559 masked_store_aligned, Sched.YMM, 3560 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3561 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3562 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3563 NoMRPattern>, EVEX_V128; 3564 } 3565} 3566 3567defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3568 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3569 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3570 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3571 PS, EVEX_CD8<32, CD8VF>; 3572 3573defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3574 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3575 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3576 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3577 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3578 3579defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3580 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3581 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3582 SchedWriteFMoveLS, "VMOVUPS">, 3583 PS, EVEX_CD8<32, CD8VF>; 3584 3585defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3586 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3587 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3588 SchedWriteFMoveLS, "VMOVUPD">, 3589 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3590 3591defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3592 HasAVX512, SchedWriteVecMoveLS, 3593 "VMOVDQA", 1>, 3594 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3595 HasAVX512, SchedWriteVecMoveLS, 3596 "VMOVDQA", 1>, 3597 PD, EVEX_CD8<32, CD8VF>; 3598 3599defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3600 HasAVX512, SchedWriteVecMoveLS, 3601 "VMOVDQA">, 3602 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3603 HasAVX512, SchedWriteVecMoveLS, 3604 "VMOVDQA">, 3605 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3606 3607defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3608 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3609 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3610 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3611 XD, EVEX_CD8<8, CD8VF>; 3612 3613defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3614 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3615 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3616 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3617 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3618 3619defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3620 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3621 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3622 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3623 XS, EVEX_CD8<32, CD8VF>; 3624 3625defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3626 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3627 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3628 SchedWriteVecMoveLS, "VMOVDQU">, 3629 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3630 3631// Special instructions to help with spilling when we don't have VLX. We need 3632// to load or store from a ZMM register instead. These are converted in 3633// expandPostRAPseudos. 3634let isReMaterializable = 1, canFoldAsLoad = 1, 3635 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3636def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3637 "", []>, Sched<[WriteFLoadX]>; 3638def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3639 "", []>, Sched<[WriteFLoadY]>; 3640def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3641 "", []>, Sched<[WriteFLoadX]>; 3642def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3643 "", []>, Sched<[WriteFLoadY]>; 3644} 3645 3646let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3647def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3648 "", []>, Sched<[WriteFStoreX]>; 3649def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3650 "", []>, Sched<[WriteFStoreY]>; 3651def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3652 "", []>, Sched<[WriteFStoreX]>; 3653def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3654 "", []>, Sched<[WriteFStoreY]>; 3655} 3656 3657def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3658 (v8i64 VR512:$src))), 3659 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3660 VK8), VR512:$src)>; 3661 3662def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3663 (v16i32 VR512:$src))), 3664 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3665 3666// These patterns exist to prevent the above patterns from introducing a second 3667// mask inversion when one already exists. 3668def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), 3669 (v8i64 immAllZerosV), 3670 (v8i64 VR512:$src))), 3671 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3672def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), 3673 (v16i32 immAllZerosV), 3674 (v16i32 VR512:$src))), 3675 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3676 3677multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3678 X86VectorVTInfo Wide> { 3679 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3680 Narrow.RC:$src1, Narrow.RC:$src0)), 3681 (EXTRACT_SUBREG 3682 (Wide.VT 3683 (!cast<Instruction>(InstrStr#"rrk") 3684 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3685 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3686 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3687 Narrow.SubRegIdx)>; 3688 3689 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3690 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3691 (EXTRACT_SUBREG 3692 (Wide.VT 3693 (!cast<Instruction>(InstrStr#"rrkz") 3694 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3695 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3696 Narrow.SubRegIdx)>; 3697} 3698 3699// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3700// available. Use a 512-bit operation and extract. 3701let Predicates = [HasAVX512, NoVLX] in { 3702 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3703 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3704 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3705 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3706 3707 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3708 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3709 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3710 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3711} 3712 3713let Predicates = [HasBWI, NoVLX] in { 3714 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3715 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3716 3717 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3718 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3719} 3720 3721let Predicates = [HasAVX512] in { 3722 // 512-bit load. 3723 def : Pat<(alignedloadv16i32 addr:$src), 3724 (VMOVDQA64Zrm addr:$src)>; 3725 def : Pat<(alignedloadv32i16 addr:$src), 3726 (VMOVDQA64Zrm addr:$src)>; 3727 def : Pat<(alignedloadv64i8 addr:$src), 3728 (VMOVDQA64Zrm addr:$src)>; 3729 def : Pat<(loadv16i32 addr:$src), 3730 (VMOVDQU64Zrm addr:$src)>; 3731 def : Pat<(loadv32i16 addr:$src), 3732 (VMOVDQU64Zrm addr:$src)>; 3733 def : Pat<(loadv64i8 addr:$src), 3734 (VMOVDQU64Zrm addr:$src)>; 3735 3736 // 512-bit store. 3737 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3738 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3739 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3740 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3741 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3742 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3743 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3744 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3745 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3746 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3747 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3748 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3749} 3750 3751let Predicates = [HasVLX] in { 3752 // 128-bit load. 3753 def : Pat<(alignedloadv4i32 addr:$src), 3754 (VMOVDQA64Z128rm addr:$src)>; 3755 def : Pat<(alignedloadv8i16 addr:$src), 3756 (VMOVDQA64Z128rm addr:$src)>; 3757 def : Pat<(alignedloadv16i8 addr:$src), 3758 (VMOVDQA64Z128rm addr:$src)>; 3759 def : Pat<(loadv4i32 addr:$src), 3760 (VMOVDQU64Z128rm addr:$src)>; 3761 def : Pat<(loadv8i16 addr:$src), 3762 (VMOVDQU64Z128rm addr:$src)>; 3763 def : Pat<(loadv16i8 addr:$src), 3764 (VMOVDQU64Z128rm addr:$src)>; 3765 3766 // 128-bit store. 3767 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3768 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3769 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3770 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3771 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3772 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3773 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3774 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3775 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3776 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3777 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3778 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3779 3780 // 256-bit load. 3781 def : Pat<(alignedloadv8i32 addr:$src), 3782 (VMOVDQA64Z256rm addr:$src)>; 3783 def : Pat<(alignedloadv16i16 addr:$src), 3784 (VMOVDQA64Z256rm addr:$src)>; 3785 def : Pat<(alignedloadv32i8 addr:$src), 3786 (VMOVDQA64Z256rm addr:$src)>; 3787 def : Pat<(loadv8i32 addr:$src), 3788 (VMOVDQU64Z256rm addr:$src)>; 3789 def : Pat<(loadv16i16 addr:$src), 3790 (VMOVDQU64Z256rm addr:$src)>; 3791 def : Pat<(loadv32i8 addr:$src), 3792 (VMOVDQU64Z256rm addr:$src)>; 3793 3794 // 256-bit store. 3795 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3796 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3797 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3798 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3799 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3800 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3801 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3802 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3803 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3804 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3805 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3806 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3807} 3808 3809// Move Int Doubleword to Packed Double Int 3810// 3811let ExeDomain = SSEPackedInt in { 3812def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3813 "vmovd\t{$src, $dst|$dst, $src}", 3814 [(set VR128X:$dst, 3815 (v4i32 (scalar_to_vector GR32:$src)))]>, 3816 EVEX, Sched<[WriteVecMoveFromGpr]>; 3817def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3818 "vmovd\t{$src, $dst|$dst, $src}", 3819 [(set VR128X:$dst, 3820 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3821 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3822def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3823 "vmovq\t{$src, $dst|$dst, $src}", 3824 [(set VR128X:$dst, 3825 (v2i64 (scalar_to_vector GR64:$src)))]>, 3826 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3827let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3828def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3829 (ins i64mem:$src), 3830 "vmovq\t{$src, $dst|$dst, $src}", []>, 3831 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3832let isCodeGenOnly = 1 in { 3833def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3834 "vmovq\t{$src, $dst|$dst, $src}", 3835 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3836 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3837def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3838 "vmovq\t{$src, $dst|$dst, $src}", 3839 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3840 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3841} 3842} // ExeDomain = SSEPackedInt 3843 3844// Move Int Doubleword to Single Scalar 3845// 3846let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3847def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3848 "vmovd\t{$src, $dst|$dst, $src}", 3849 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3850 EVEX, Sched<[WriteVecMoveFromGpr]>; 3851} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3852 3853// Move doubleword from xmm register to r/m32 3854// 3855let ExeDomain = SSEPackedInt in { 3856def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3857 "vmovd\t{$src, $dst|$dst, $src}", 3858 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3859 (iPTR 0)))]>, 3860 EVEX, Sched<[WriteVecMoveToGpr]>; 3861def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3862 (ins i32mem:$dst, VR128X:$src), 3863 "vmovd\t{$src, $dst|$dst, $src}", 3864 [(store (i32 (extractelt (v4i32 VR128X:$src), 3865 (iPTR 0))), addr:$dst)]>, 3866 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3867} // ExeDomain = SSEPackedInt 3868 3869// Move quadword from xmm1 register to r/m64 3870// 3871let ExeDomain = SSEPackedInt in { 3872def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3873 "vmovq\t{$src, $dst|$dst, $src}", 3874 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3875 (iPTR 0)))]>, 3876 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 3877 Requires<[HasAVX512]>; 3878 3879let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3880def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3881 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 3882 EVEX, VEX_W, Sched<[WriteVecStore]>, 3883 Requires<[HasAVX512, In64BitMode]>; 3884 3885def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3886 (ins i64mem:$dst, VR128X:$src), 3887 "vmovq\t{$src, $dst|$dst, $src}", 3888 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3889 addr:$dst)]>, 3890 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 3891 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3892 3893let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3894def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3895 (ins VR128X:$src), 3896 "vmovq\t{$src, $dst|$dst, $src}", []>, 3897 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 3898} // ExeDomain = SSEPackedInt 3899 3900def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3901 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3902 3903let Predicates = [HasAVX512] in { 3904 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3905 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3906} 3907 3908// Move Scalar Single to Double Int 3909// 3910let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3911def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3912 (ins FR32X:$src), 3913 "vmovd\t{$src, $dst|$dst, $src}", 3914 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3915 EVEX, Sched<[WriteVecMoveToGpr]>; 3916} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3917 3918// Move Quadword Int to Packed Quadword Int 3919// 3920let ExeDomain = SSEPackedInt in { 3921def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3922 (ins i64mem:$src), 3923 "vmovq\t{$src, $dst|$dst, $src}", 3924 [(set VR128X:$dst, 3925 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3926 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3927} // ExeDomain = SSEPackedInt 3928 3929// Allow "vmovd" but print "vmovq". 3930def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3931 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3932def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3933 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3934 3935// Conversions between masks and scalar fp. 3936def : Pat<(v32i1 (bitconvert FR32X:$src)), 3937 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 3938def : Pat<(f32 (bitconvert VK32:$src)), 3939 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 3940 3941def : Pat<(v64i1 (bitconvert FR64X:$src)), 3942 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 3943def : Pat<(f64 (bitconvert VK64:$src)), 3944 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 3945 3946//===----------------------------------------------------------------------===// 3947// AVX-512 MOVSS, MOVSD 3948//===----------------------------------------------------------------------===// 3949 3950multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3951 X86VectorVTInfo _> { 3952 let Predicates = [HasAVX512, OptForSize] in 3953 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3954 (ins _.RC:$src1, _.RC:$src2), 3955 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3956 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3957 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 3958 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3959 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3960 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3961 "$dst {${mask}} {z}, $src1, $src2}"), 3962 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3963 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3964 _.ImmAllZerosV)))], 3965 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3966 let Constraints = "$src0 = $dst" in 3967 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3968 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3969 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3970 "$dst {${mask}}, $src1, $src2}"), 3971 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3972 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3973 (_.VT _.RC:$src0))))], 3974 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3975 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3976 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3977 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3978 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3979 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3980 // _alt version uses FR32/FR64 register class. 3981 let isCodeGenOnly = 1 in 3982 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3983 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3984 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3985 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3986 } 3987 let mayLoad = 1, hasSideEffects = 0 in { 3988 let Constraints = "$src0 = $dst" in 3989 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3990 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3991 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3992 "$dst {${mask}}, $src}"), 3993 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3994 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3995 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3996 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3997 "$dst {${mask}} {z}, $src}"), 3998 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3999 } 4000 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 4001 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4002 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 4003 EVEX, Sched<[WriteFStore]>; 4004 let mayStore = 1, hasSideEffects = 0 in 4005 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 4006 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 4007 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 4008 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 4009 NotMemoryFoldable; 4010} 4011 4012defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 4013 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 4014 4015defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 4016 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4017 4018 4019multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4020 PatLeaf ZeroFP, X86VectorVTInfo _> { 4021 4022def : Pat<(_.VT (OpNode _.RC:$src0, 4023 (_.VT (scalar_to_vector 4024 (_.EltVT (X86selects VK1WM:$mask, 4025 (_.EltVT _.FRC:$src1), 4026 (_.EltVT _.FRC:$src2))))))), 4027 (!cast<Instruction>(InstrStr#rrk) 4028 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4029 VK1WM:$mask, 4030 (_.VT _.RC:$src0), 4031 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4032 4033def : Pat<(_.VT (OpNode _.RC:$src0, 4034 (_.VT (scalar_to_vector 4035 (_.EltVT (X86selects VK1WM:$mask, 4036 (_.EltVT _.FRC:$src1), 4037 (_.EltVT ZeroFP))))))), 4038 (!cast<Instruction>(InstrStr#rrkz) 4039 VK1WM:$mask, 4040 (_.VT _.RC:$src0), 4041 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4042} 4043 4044multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4045 dag Mask, RegisterClass MaskRC> { 4046 4047def : Pat<(masked_store 4048 (_.info512.VT (insert_subvector undef, 4049 (_.info128.VT _.info128.RC:$src), 4050 (iPTR 0))), addr:$dst, Mask), 4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4052 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4053 _.info128.RC:$src)>; 4054 4055} 4056 4057multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4058 AVX512VLVectorVTInfo _, 4059 dag Mask, RegisterClass MaskRC, 4060 SubRegIndex subreg> { 4061 4062def : Pat<(masked_store 4063 (_.info512.VT (insert_subvector undef, 4064 (_.info128.VT _.info128.RC:$src), 4065 (iPTR 0))), addr:$dst, Mask), 4066 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4067 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4068 _.info128.RC:$src)>; 4069 4070} 4071 4072// This matches the more recent codegen from clang that avoids emitting a 512 4073// bit masked store directly. Codegen will widen 128-bit masked store to 512 4074// bits on AVX512F only targets. 4075multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4076 AVX512VLVectorVTInfo _, 4077 dag Mask512, dag Mask128, 4078 RegisterClass MaskRC, 4079 SubRegIndex subreg> { 4080 4081// AVX512F pattern. 4082def : Pat<(masked_store 4083 (_.info512.VT (insert_subvector undef, 4084 (_.info128.VT _.info128.RC:$src), 4085 (iPTR 0))), addr:$dst, Mask512), 4086 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4087 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4088 _.info128.RC:$src)>; 4089 4090// AVX512VL pattern. 4091def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4092 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4093 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4094 _.info128.RC:$src)>; 4095} 4096 4097multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4098 dag Mask, RegisterClass MaskRC> { 4099 4100def : Pat<(_.info128.VT (extract_subvector 4101 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4102 _.info512.ImmAllZerosV)), 4103 (iPTR 0))), 4104 (!cast<Instruction>(InstrStr#rmkz) 4105 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4106 addr:$srcAddr)>; 4107 4108def : Pat<(_.info128.VT (extract_subvector 4109 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4110 (_.info512.VT (insert_subvector undef, 4111 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4112 (iPTR 0))))), 4113 (iPTR 0))), 4114 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4115 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4116 addr:$srcAddr)>; 4117 4118} 4119 4120multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4121 AVX512VLVectorVTInfo _, 4122 dag Mask, RegisterClass MaskRC, 4123 SubRegIndex subreg> { 4124 4125def : Pat<(_.info128.VT (extract_subvector 4126 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4127 _.info512.ImmAllZerosV)), 4128 (iPTR 0))), 4129 (!cast<Instruction>(InstrStr#rmkz) 4130 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4131 addr:$srcAddr)>; 4132 4133def : Pat<(_.info128.VT (extract_subvector 4134 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4135 (_.info512.VT (insert_subvector undef, 4136 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4137 (iPTR 0))))), 4138 (iPTR 0))), 4139 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4140 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4141 addr:$srcAddr)>; 4142 4143} 4144 4145// This matches the more recent codegen from clang that avoids emitting a 512 4146// bit masked load directly. Codegen will widen 128-bit masked load to 512 4147// bits on AVX512F only targets. 4148multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4149 AVX512VLVectorVTInfo _, 4150 dag Mask512, dag Mask128, 4151 RegisterClass MaskRC, 4152 SubRegIndex subreg> { 4153// AVX512F patterns. 4154def : Pat<(_.info128.VT (extract_subvector 4155 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4156 _.info512.ImmAllZerosV)), 4157 (iPTR 0))), 4158 (!cast<Instruction>(InstrStr#rmkz) 4159 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4160 addr:$srcAddr)>; 4161 4162def : Pat<(_.info128.VT (extract_subvector 4163 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4164 (_.info512.VT (insert_subvector undef, 4165 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4166 (iPTR 0))))), 4167 (iPTR 0))), 4168 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4169 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4170 addr:$srcAddr)>; 4171 4172// AVX512Vl patterns. 4173def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4174 _.info128.ImmAllZerosV)), 4175 (!cast<Instruction>(InstrStr#rmkz) 4176 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4177 addr:$srcAddr)>; 4178 4179def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4180 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4181 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4182 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4183 addr:$srcAddr)>; 4184} 4185 4186defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4187defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4188 4189defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4190 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4191defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4192 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4193defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4194 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4195 4196defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4197 (v16i1 (insert_subvector 4198 (v16i1 immAllZerosV), 4199 (v4i1 (extract_subvector 4200 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4201 (iPTR 0))), 4202 (iPTR 0))), 4203 (v4i1 (extract_subvector 4204 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4205 (iPTR 0))), GR8, sub_8bit>; 4206defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4207 (v8i1 4208 (extract_subvector 4209 (v16i1 4210 (insert_subvector 4211 (v16i1 immAllZerosV), 4212 (v2i1 (extract_subvector 4213 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4214 (iPTR 0))), 4215 (iPTR 0))), 4216 (iPTR 0))), 4217 (v2i1 (extract_subvector 4218 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4219 (iPTR 0))), GR8, sub_8bit>; 4220 4221defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4222 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4223defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4224 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4225defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4226 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4227 4228defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4229 (v16i1 (insert_subvector 4230 (v16i1 immAllZerosV), 4231 (v4i1 (extract_subvector 4232 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4233 (iPTR 0))), 4234 (iPTR 0))), 4235 (v4i1 (extract_subvector 4236 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4237 (iPTR 0))), GR8, sub_8bit>; 4238defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4239 (v8i1 4240 (extract_subvector 4241 (v16i1 4242 (insert_subvector 4243 (v16i1 immAllZerosV), 4244 (v2i1 (extract_subvector 4245 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4246 (iPTR 0))), 4247 (iPTR 0))), 4248 (iPTR 0))), 4249 (v2i1 (extract_subvector 4250 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4251 (iPTR 0))), GR8, sub_8bit>; 4252 4253def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4254 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4255 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4256 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4257 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4258 4259def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4260 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4261 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4262 4263def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4264 (COPY_TO_REGCLASS 4265 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4266 VK1WM:$mask, addr:$src)), 4267 FR32X)>; 4268def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4269 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4270 4271def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4272 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4273 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4274 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4275 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4276 4277def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4278 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4279 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4280 4281def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4282 (COPY_TO_REGCLASS 4283 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4284 VK1WM:$mask, addr:$src)), 4285 FR64X)>; 4286def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4287 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4288 4289let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4290 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4291 (ins VR128X:$src1, VR128X:$src2), 4292 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4293 []>, XS, EVEX_4V, VEX_LIG, 4294 FoldGenData<"VMOVSSZrr">, 4295 Sched<[SchedWriteFShuffle.XMM]>; 4296 4297 let Constraints = "$src0 = $dst" in 4298 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4299 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4300 VR128X:$src1, VR128X:$src2), 4301 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4302 "$dst {${mask}}, $src1, $src2}", 4303 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4304 FoldGenData<"VMOVSSZrrk">, 4305 Sched<[SchedWriteFShuffle.XMM]>; 4306 4307 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4308 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4309 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4310 "$dst {${mask}} {z}, $src1, $src2}", 4311 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4312 FoldGenData<"VMOVSSZrrkz">, 4313 Sched<[SchedWriteFShuffle.XMM]>; 4314 4315 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4316 (ins VR128X:$src1, VR128X:$src2), 4317 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4318 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4319 FoldGenData<"VMOVSDZrr">, 4320 Sched<[SchedWriteFShuffle.XMM]>; 4321 4322 let Constraints = "$src0 = $dst" in 4323 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4324 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4325 VR128X:$src1, VR128X:$src2), 4326 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4327 "$dst {${mask}}, $src1, $src2}", 4328 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4329 VEX_W, FoldGenData<"VMOVSDZrrk">, 4330 Sched<[SchedWriteFShuffle.XMM]>; 4331 4332 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4333 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4334 VR128X:$src2), 4335 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4336 "$dst {${mask}} {z}, $src1, $src2}", 4337 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4338 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4339 Sched<[SchedWriteFShuffle.XMM]>; 4340} 4341 4342def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4343 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4344def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4345 "$dst {${mask}}, $src1, $src2}", 4346 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4347 VR128X:$src1, VR128X:$src2), 0>; 4348def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4349 "$dst {${mask}} {z}, $src1, $src2}", 4350 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4351 VR128X:$src1, VR128X:$src2), 0>; 4352def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4353 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4354def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4355 "$dst {${mask}}, $src1, $src2}", 4356 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4357 VR128X:$src1, VR128X:$src2), 0>; 4358def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4359 "$dst {${mask}} {z}, $src1, $src2}", 4360 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4361 VR128X:$src1, VR128X:$src2), 0>; 4362 4363let Predicates = [HasAVX512, OptForSize] in { 4364 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4365 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4366 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4367 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4368 4369 // Move low f32 and clear high bits. 4370 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4371 (SUBREG_TO_REG (i32 0), 4372 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4373 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4374 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4375 (SUBREG_TO_REG (i32 0), 4376 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4377 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4378 4379 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4380 (SUBREG_TO_REG (i32 0), 4381 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4382 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4383 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4384 (SUBREG_TO_REG (i32 0), 4385 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4386 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4387} 4388 4389// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4390// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4391let Predicates = [HasAVX512, OptForSpeed] in { 4392 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4393 (SUBREG_TO_REG (i32 0), 4394 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4395 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4396 (i8 1))), sub_xmm)>; 4397 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4398 (SUBREG_TO_REG (i32 0), 4399 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4400 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4401 (i8 3))), sub_xmm)>; 4402} 4403 4404let Predicates = [HasAVX512] in { 4405 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4406 (VMOVSSZrm addr:$src)>; 4407 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4408 (VMOVSDZrm addr:$src)>; 4409 4410 // Represent the same patterns above but in the form they appear for 4411 // 256-bit types 4412 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4413 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4414 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4415 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4416 4417 // Represent the same patterns above but in the form they appear for 4418 // 512-bit types 4419 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4420 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4421 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4422 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4423} 4424 4425let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4426def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4427 (ins VR128X:$src), 4428 "vmovq\t{$src, $dst|$dst, $src}", 4429 [(set VR128X:$dst, (v2i64 (X86vzmovl 4430 (v2i64 VR128X:$src))))]>, 4431 EVEX, VEX_W; 4432} 4433 4434let Predicates = [HasAVX512] in { 4435 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4436 (VMOVDI2PDIZrr GR32:$src)>; 4437 4438 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4439 (VMOV64toPQIZrr GR64:$src)>; 4440 4441 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4442 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 4443 (VMOVDI2PDIZrm addr:$src)>; 4444 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4445 (VMOVDI2PDIZrm addr:$src)>; 4446 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4447 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4448 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4449 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4450 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4451 (VMOVQI2PQIZrm addr:$src)>; 4452 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4453 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4454 4455 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4456 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4457 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4458 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4459 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4460 4461 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4462 (SUBREG_TO_REG (i32 0), 4463 (v2f64 (VMOVZPQILo2PQIZrr 4464 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4465 sub_xmm)>; 4466 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4467 (SUBREG_TO_REG (i32 0), 4468 (v2i64 (VMOVZPQILo2PQIZrr 4469 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4470 sub_xmm)>; 4471 4472 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4473 (SUBREG_TO_REG (i32 0), 4474 (v2f64 (VMOVZPQILo2PQIZrr 4475 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4476 sub_xmm)>; 4477 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4478 (SUBREG_TO_REG (i32 0), 4479 (v2i64 (VMOVZPQILo2PQIZrr 4480 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4481 sub_xmm)>; 4482} 4483 4484//===----------------------------------------------------------------------===// 4485// AVX-512 - Non-temporals 4486//===----------------------------------------------------------------------===// 4487 4488def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4489 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4490 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4491 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4492 4493let Predicates = [HasVLX] in { 4494 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4495 (ins i256mem:$src), 4496 "vmovntdqa\t{$src, $dst|$dst, $src}", 4497 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4498 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4499 4500 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4501 (ins i128mem:$src), 4502 "vmovntdqa\t{$src, $dst|$dst, $src}", 4503 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4504 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4505} 4506 4507multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4508 X86SchedWriteMoveLS Sched, 4509 PatFrag st_frag = alignednontemporalstore> { 4510 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4511 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4512 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4513 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4514 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4515} 4516 4517multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4518 AVX512VLVectorVTInfo VTInfo, 4519 X86SchedWriteMoveLSWidths Sched> { 4520 let Predicates = [HasAVX512] in 4521 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4522 4523 let Predicates = [HasAVX512, HasVLX] in { 4524 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4525 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4526 } 4527} 4528 4529defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4530 SchedWriteVecMoveLSNT>, PD; 4531defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4532 SchedWriteFMoveLSNT>, PD, VEX_W; 4533defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4534 SchedWriteFMoveLSNT>, PS; 4535 4536let Predicates = [HasAVX512], AddedComplexity = 400 in { 4537 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4538 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4539 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4540 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4541 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4542 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4543 4544 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4545 (VMOVNTDQAZrm addr:$src)>; 4546 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4547 (VMOVNTDQAZrm addr:$src)>; 4548 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4549 (VMOVNTDQAZrm addr:$src)>; 4550 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4551 (VMOVNTDQAZrm addr:$src)>; 4552 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4553 (VMOVNTDQAZrm addr:$src)>; 4554 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4555 (VMOVNTDQAZrm addr:$src)>; 4556} 4557 4558let Predicates = [HasVLX], AddedComplexity = 400 in { 4559 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4560 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4561 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4562 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4563 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4564 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4565 4566 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4567 (VMOVNTDQAZ256rm addr:$src)>; 4568 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4569 (VMOVNTDQAZ256rm addr:$src)>; 4570 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4571 (VMOVNTDQAZ256rm addr:$src)>; 4572 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4573 (VMOVNTDQAZ256rm addr:$src)>; 4574 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4575 (VMOVNTDQAZ256rm addr:$src)>; 4576 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4577 (VMOVNTDQAZ256rm addr:$src)>; 4578 4579 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4580 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4581 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4582 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4583 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4584 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4585 4586 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4587 (VMOVNTDQAZ128rm addr:$src)>; 4588 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4589 (VMOVNTDQAZ128rm addr:$src)>; 4590 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4591 (VMOVNTDQAZ128rm addr:$src)>; 4592 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4593 (VMOVNTDQAZ128rm addr:$src)>; 4594 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4595 (VMOVNTDQAZ128rm addr:$src)>; 4596 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4597 (VMOVNTDQAZ128rm addr:$src)>; 4598} 4599 4600//===----------------------------------------------------------------------===// 4601// AVX-512 - Integer arithmetic 4602// 4603multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4604 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4605 bit IsCommutable = 0> { 4606 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4607 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4608 "$src2, $src1", "$src1, $src2", 4609 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4610 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4611 Sched<[sched]>; 4612 4613 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4614 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4615 "$src2, $src1", "$src1, $src2", 4616 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4617 AVX512BIBase, EVEX_4V, 4618 Sched<[sched.Folded, sched.ReadAfterFold]>; 4619} 4620 4621multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4622 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4623 bit IsCommutable = 0> : 4624 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4625 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4626 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4627 "${src2}"##_.BroadcastStr##", $src1", 4628 "$src1, ${src2}"##_.BroadcastStr, 4629 (_.VT (OpNode _.RC:$src1, 4630 (_.BroadcastLdFrag addr:$src2)))>, 4631 AVX512BIBase, EVEX_4V, EVEX_B, 4632 Sched<[sched.Folded, sched.ReadAfterFold]>; 4633} 4634 4635multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4636 AVX512VLVectorVTInfo VTInfo, 4637 X86SchedWriteWidths sched, Predicate prd, 4638 bit IsCommutable = 0> { 4639 let Predicates = [prd] in 4640 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4641 IsCommutable>, EVEX_V512; 4642 4643 let Predicates = [prd, HasVLX] in { 4644 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4645 sched.YMM, IsCommutable>, EVEX_V256; 4646 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4647 sched.XMM, IsCommutable>, EVEX_V128; 4648 } 4649} 4650 4651multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4652 AVX512VLVectorVTInfo VTInfo, 4653 X86SchedWriteWidths sched, Predicate prd, 4654 bit IsCommutable = 0> { 4655 let Predicates = [prd] in 4656 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4657 IsCommutable>, EVEX_V512; 4658 4659 let Predicates = [prd, HasVLX] in { 4660 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4661 sched.YMM, IsCommutable>, EVEX_V256; 4662 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4663 sched.XMM, IsCommutable>, EVEX_V128; 4664 } 4665} 4666 4667multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4668 X86SchedWriteWidths sched, Predicate prd, 4669 bit IsCommutable = 0> { 4670 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4671 sched, prd, IsCommutable>, 4672 VEX_W, EVEX_CD8<64, CD8VF>; 4673} 4674 4675multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4676 X86SchedWriteWidths sched, Predicate prd, 4677 bit IsCommutable = 0> { 4678 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4679 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4680} 4681 4682multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4683 X86SchedWriteWidths sched, Predicate prd, 4684 bit IsCommutable = 0> { 4685 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4686 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4687 VEX_WIG; 4688} 4689 4690multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4691 X86SchedWriteWidths sched, Predicate prd, 4692 bit IsCommutable = 0> { 4693 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4694 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4695 VEX_WIG; 4696} 4697 4698multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4699 SDNode OpNode, X86SchedWriteWidths sched, 4700 Predicate prd, bit IsCommutable = 0> { 4701 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4702 IsCommutable>; 4703 4704 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4705 IsCommutable>; 4706} 4707 4708multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4709 SDNode OpNode, X86SchedWriteWidths sched, 4710 Predicate prd, bit IsCommutable = 0> { 4711 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4712 IsCommutable>; 4713 4714 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4715 IsCommutable>; 4716} 4717 4718multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4719 bits<8> opc_d, bits<8> opc_q, 4720 string OpcodeStr, SDNode OpNode, 4721 X86SchedWriteWidths sched, 4722 bit IsCommutable = 0> { 4723 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4724 sched, HasAVX512, IsCommutable>, 4725 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4726 sched, HasBWI, IsCommutable>; 4727} 4728 4729multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4730 X86FoldableSchedWrite sched, 4731 SDNode OpNode,X86VectorVTInfo _Src, 4732 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4733 bit IsCommutable = 0> { 4734 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4735 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4736 "$src2, $src1","$src1, $src2", 4737 (_Dst.VT (OpNode 4738 (_Src.VT _Src.RC:$src1), 4739 (_Src.VT _Src.RC:$src2))), 4740 IsCommutable>, 4741 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4742 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4743 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4744 "$src2, $src1", "$src1, $src2", 4745 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4746 (_Src.LdFrag addr:$src2)))>, 4747 AVX512BIBase, EVEX_4V, 4748 Sched<[sched.Folded, sched.ReadAfterFold]>; 4749 4750 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4751 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4752 OpcodeStr, 4753 "${src2}"##_Brdct.BroadcastStr##", $src1", 4754 "$src1, ${src2}"##_Brdct.BroadcastStr, 4755 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4756 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4757 AVX512BIBase, EVEX_4V, EVEX_B, 4758 Sched<[sched.Folded, sched.ReadAfterFold]>; 4759} 4760 4761defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4762 SchedWriteVecALU, 1>; 4763defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4764 SchedWriteVecALU, 0>; 4765defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4766 SchedWriteVecALU, HasBWI, 1>; 4767defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4768 SchedWriteVecALU, HasBWI, 0>; 4769defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4770 SchedWriteVecALU, HasBWI, 1>; 4771defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4772 SchedWriteVecALU, HasBWI, 0>; 4773defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4774 SchedWritePMULLD, HasAVX512, 1>, T8PD; 4775defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4776 SchedWriteVecIMul, HasBWI, 1>; 4777defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4778 SchedWriteVecIMul, HasDQI, 1>, T8PD, 4779 NotEVEX2VEXConvertible; 4780defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4781 HasBWI, 1>; 4782defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4783 HasBWI, 1>; 4784defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4785 SchedWriteVecIMul, HasBWI, 1>, T8PD; 4786defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 4787 SchedWriteVecALU, HasBWI, 1>; 4788defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4789 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 4790defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4791 SchedWriteVecIMul, HasAVX512, 1>; 4792 4793multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4794 X86SchedWriteWidths sched, 4795 AVX512VLVectorVTInfo _SrcVTInfo, 4796 AVX512VLVectorVTInfo _DstVTInfo, 4797 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4798 let Predicates = [prd] in 4799 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4800 _SrcVTInfo.info512, _DstVTInfo.info512, 4801 v8i64_info, IsCommutable>, 4802 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 4803 let Predicates = [HasVLX, prd] in { 4804 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4805 _SrcVTInfo.info256, _DstVTInfo.info256, 4806 v4i64x_info, IsCommutable>, 4807 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 4808 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4809 _SrcVTInfo.info128, _DstVTInfo.info128, 4810 v2i64x_info, IsCommutable>, 4811 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 4812 } 4813} 4814 4815defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4816 avx512vl_i8_info, avx512vl_i8_info, 4817 X86multishift, HasVBMI, 0>, T8PD; 4818 4819multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4820 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4821 X86FoldableSchedWrite sched> { 4822 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4823 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4824 OpcodeStr, 4825 "${src2}"##_Src.BroadcastStr##", $src1", 4826 "$src1, ${src2}"##_Src.BroadcastStr, 4827 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4828 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4829 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4830 Sched<[sched.Folded, sched.ReadAfterFold]>; 4831} 4832 4833multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4834 SDNode OpNode,X86VectorVTInfo _Src, 4835 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4836 bit IsCommutable = 0> { 4837 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4838 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4839 "$src2, $src1","$src1, $src2", 4840 (_Dst.VT (OpNode 4841 (_Src.VT _Src.RC:$src1), 4842 (_Src.VT _Src.RC:$src2))), 4843 IsCommutable, IsCommutable>, 4844 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 4845 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4846 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4847 "$src2, $src1", "$src1, $src2", 4848 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4849 (_Src.LdFrag addr:$src2)))>, 4850 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 4851 Sched<[sched.Folded, sched.ReadAfterFold]>; 4852} 4853 4854multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4855 SDNode OpNode> { 4856 let Predicates = [HasBWI] in 4857 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4858 v32i16_info, SchedWriteShuffle.ZMM>, 4859 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4860 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4861 let Predicates = [HasBWI, HasVLX] in { 4862 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4863 v16i16x_info, SchedWriteShuffle.YMM>, 4864 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4865 v16i16x_info, SchedWriteShuffle.YMM>, 4866 EVEX_V256; 4867 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4868 v8i16x_info, SchedWriteShuffle.XMM>, 4869 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4870 v8i16x_info, SchedWriteShuffle.XMM>, 4871 EVEX_V128; 4872 } 4873} 4874multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4875 SDNode OpNode> { 4876 let Predicates = [HasBWI] in 4877 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4878 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 4879 let Predicates = [HasBWI, HasVLX] in { 4880 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4881 v32i8x_info, SchedWriteShuffle.YMM>, 4882 EVEX_V256, VEX_WIG; 4883 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4884 v16i8x_info, SchedWriteShuffle.XMM>, 4885 EVEX_V128, VEX_WIG; 4886 } 4887} 4888 4889multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4890 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4891 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4892 let Predicates = [HasBWI] in 4893 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4894 _Dst.info512, SchedWriteVecIMul.ZMM, 4895 IsCommutable>, EVEX_V512; 4896 let Predicates = [HasBWI, HasVLX] in { 4897 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4898 _Dst.info256, SchedWriteVecIMul.YMM, 4899 IsCommutable>, EVEX_V256; 4900 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4901 _Dst.info128, SchedWriteVecIMul.XMM, 4902 IsCommutable>, EVEX_V128; 4903 } 4904} 4905 4906defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4907defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4908defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4909defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4910 4911defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4912 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 4913defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4914 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 4915 4916defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4917 SchedWriteVecALU, HasBWI, 1>, T8PD; 4918defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4919 SchedWriteVecALU, HasBWI, 1>; 4920defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4921 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4922defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4923 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4924 NotEVEX2VEXConvertible; 4925 4926defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4927 SchedWriteVecALU, HasBWI, 1>; 4928defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4929 SchedWriteVecALU, HasBWI, 1>, T8PD; 4930defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4931 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4932defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4933 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4934 NotEVEX2VEXConvertible; 4935 4936defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4937 SchedWriteVecALU, HasBWI, 1>, T8PD; 4938defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4939 SchedWriteVecALU, HasBWI, 1>; 4940defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4941 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4942defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4943 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4944 NotEVEX2VEXConvertible; 4945 4946defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4947 SchedWriteVecALU, HasBWI, 1>; 4948defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4949 SchedWriteVecALU, HasBWI, 1>, T8PD; 4950defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4951 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4952defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4953 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4954 NotEVEX2VEXConvertible; 4955 4956// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4957let Predicates = [HasDQI, NoVLX] in { 4958 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4959 (EXTRACT_SUBREG 4960 (VPMULLQZrr 4961 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4962 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4963 sub_ymm)>; 4964 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4965 (EXTRACT_SUBREG 4966 (VPMULLQZrmb 4967 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4968 addr:$src2), 4969 sub_ymm)>; 4970 4971 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 4972 (EXTRACT_SUBREG 4973 (VPMULLQZrr 4974 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4975 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4976 sub_xmm)>; 4977 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 4978 (EXTRACT_SUBREG 4979 (VPMULLQZrmb 4980 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4981 addr:$src2), 4982 sub_xmm)>; 4983} 4984 4985multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 4986 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 4987 (EXTRACT_SUBREG 4988 (!cast<Instruction>(Instr#"rr") 4989 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4990 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4991 sub_ymm)>; 4992 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4993 (EXTRACT_SUBREG 4994 (!cast<Instruction>(Instr#"rmb") 4995 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4996 addr:$src2), 4997 sub_ymm)>; 4998 4999 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5000 (EXTRACT_SUBREG 5001 (!cast<Instruction>(Instr#"rr") 5002 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5003 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5004 sub_xmm)>; 5005 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5006 (EXTRACT_SUBREG 5007 (!cast<Instruction>(Instr#"rmb") 5008 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5009 addr:$src2), 5010 sub_xmm)>; 5011} 5012 5013let Predicates = [HasAVX512, NoVLX] in { 5014 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5015 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5016 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5017 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5018} 5019 5020//===----------------------------------------------------------------------===// 5021// AVX-512 Logical Instructions 5022//===----------------------------------------------------------------------===// 5023 5024defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5025 SchedWriteVecLogic, HasAVX512, 1>; 5026defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5027 SchedWriteVecLogic, HasAVX512, 1>; 5028defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5029 SchedWriteVecLogic, HasAVX512, 1>; 5030defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5031 SchedWriteVecLogic, HasAVX512>; 5032 5033let Predicates = [HasVLX] in { 5034 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5035 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5036 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5037 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5038 5039 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5040 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5041 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5042 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5043 5044 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5045 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5046 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5047 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5048 5049 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5050 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5051 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5052 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5053 5054 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5055 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5056 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5057 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5058 5059 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5060 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5061 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5062 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5063 5064 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5065 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5066 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5067 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5068 5069 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5070 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5071 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5072 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5073 5074 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5075 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5076 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5077 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5078 5079 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5080 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5081 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5082 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5083 5084 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5085 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5086 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5087 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5088 5089 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5090 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5091 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5092 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5093 5094 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5095 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5096 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5097 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5098 5099 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5100 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5101 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5102 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5103 5104 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5105 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5106 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5107 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5108 5109 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5110 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5111 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5112 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5113} 5114 5115let Predicates = [HasAVX512] in { 5116 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5117 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5118 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5119 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5120 5121 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5122 (VPORQZrr VR512:$src1, VR512:$src2)>; 5123 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5124 (VPORQZrr VR512:$src1, VR512:$src2)>; 5125 5126 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5127 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5128 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5129 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5130 5131 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5132 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5133 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5134 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5135 5136 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5137 (VPANDQZrm VR512:$src1, addr:$src2)>; 5138 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5139 (VPANDQZrm VR512:$src1, addr:$src2)>; 5140 5141 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5142 (VPORQZrm VR512:$src1, addr:$src2)>; 5143 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5144 (VPORQZrm VR512:$src1, addr:$src2)>; 5145 5146 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5147 (VPXORQZrm VR512:$src1, addr:$src2)>; 5148 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5149 (VPXORQZrm VR512:$src1, addr:$src2)>; 5150 5151 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5152 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5153 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5154 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5155} 5156 5157// Patterns to catch vselect with different type than logic op. 5158multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5159 X86VectorVTInfo _, 5160 X86VectorVTInfo IntInfo> { 5161 // Masked register-register logical operations. 5162 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5163 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5164 _.RC:$src0)), 5165 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5166 _.RC:$src1, _.RC:$src2)>; 5167 5168 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5169 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5170 _.ImmAllZerosV)), 5171 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5172 _.RC:$src2)>; 5173 5174 // Masked register-memory logical operations. 5175 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5176 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5177 (load addr:$src2)))), 5178 _.RC:$src0)), 5179 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5180 _.RC:$src1, addr:$src2)>; 5181 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5182 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5183 (load addr:$src2)))), 5184 _.ImmAllZerosV)), 5185 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5186 addr:$src2)>; 5187} 5188 5189multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5190 X86VectorVTInfo _, 5191 X86VectorVTInfo IntInfo> { 5192 // Register-broadcast logical operations. 5193 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5194 (bitconvert 5195 (IntInfo.VT (OpNode _.RC:$src1, 5196 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5197 _.RC:$src0)), 5198 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5199 _.RC:$src1, addr:$src2)>; 5200 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5201 (bitconvert 5202 (IntInfo.VT (OpNode _.RC:$src1, 5203 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5204 _.ImmAllZerosV)), 5205 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5206 _.RC:$src1, addr:$src2)>; 5207} 5208 5209multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5210 AVX512VLVectorVTInfo SelectInfo, 5211 AVX512VLVectorVTInfo IntInfo> { 5212let Predicates = [HasVLX] in { 5213 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5214 IntInfo.info128>; 5215 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5216 IntInfo.info256>; 5217} 5218let Predicates = [HasAVX512] in { 5219 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5220 IntInfo.info512>; 5221} 5222} 5223 5224multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5225 AVX512VLVectorVTInfo SelectInfo, 5226 AVX512VLVectorVTInfo IntInfo> { 5227let Predicates = [HasVLX] in { 5228 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5229 SelectInfo.info128, IntInfo.info128>; 5230 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5231 SelectInfo.info256, IntInfo.info256>; 5232} 5233let Predicates = [HasAVX512] in { 5234 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5235 SelectInfo.info512, IntInfo.info512>; 5236} 5237} 5238 5239multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5240 // i64 vselect with i32/i16/i8 logic op 5241 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5242 avx512vl_i32_info>; 5243 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5244 avx512vl_i16_info>; 5245 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5246 avx512vl_i8_info>; 5247 5248 // i32 vselect with i64/i16/i8 logic op 5249 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5250 avx512vl_i64_info>; 5251 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5252 avx512vl_i16_info>; 5253 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5254 avx512vl_i8_info>; 5255 5256 // f32 vselect with i64/i32/i16/i8 logic op 5257 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5258 avx512vl_i64_info>; 5259 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5260 avx512vl_i32_info>; 5261 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5262 avx512vl_i16_info>; 5263 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5264 avx512vl_i8_info>; 5265 5266 // f64 vselect with i64/i32/i16/i8 logic op 5267 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5268 avx512vl_i64_info>; 5269 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5270 avx512vl_i32_info>; 5271 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5272 avx512vl_i16_info>; 5273 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5274 avx512vl_i8_info>; 5275 5276 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5277 avx512vl_f32_info, 5278 avx512vl_i32_info>; 5279 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5280 avx512vl_f64_info, 5281 avx512vl_i64_info>; 5282} 5283 5284defm : avx512_logical_lowering_types<"VPAND", and>; 5285defm : avx512_logical_lowering_types<"VPOR", or>; 5286defm : avx512_logical_lowering_types<"VPXOR", xor>; 5287defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5288 5289//===----------------------------------------------------------------------===// 5290// AVX-512 FP arithmetic 5291//===----------------------------------------------------------------------===// 5292 5293multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5294 SDNode OpNode, SDNode VecNode, 5295 X86FoldableSchedWrite sched, bit IsCommutable> { 5296 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5297 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5298 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5299 "$src2, $src1", "$src1, $src2", 5300 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5301 Sched<[sched]>; 5302 5303 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5304 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5305 "$src2, $src1", "$src1, $src2", 5306 (_.VT (VecNode _.RC:$src1, 5307 _.ScalarIntMemCPat:$src2))>, 5308 Sched<[sched.Folded, sched.ReadAfterFold]>; 5309 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5310 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5311 (ins _.FRC:$src1, _.FRC:$src2), 5312 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5313 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5314 Sched<[sched]> { 5315 let isCommutable = IsCommutable; 5316 } 5317 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5318 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5319 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5320 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5321 (_.ScalarLdFrag addr:$src2)))]>, 5322 Sched<[sched.Folded, sched.ReadAfterFold]>; 5323 } 5324 } 5325} 5326 5327multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5328 SDNode VecNode, X86FoldableSchedWrite sched, 5329 bit IsCommutable = 0> { 5330 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5331 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5332 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5333 "$rc, $src2, $src1", "$src1, $src2, $rc", 5334 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5335 (i32 timm:$rc))>, 5336 EVEX_B, EVEX_RC, Sched<[sched]>; 5337} 5338multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5339 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5340 X86FoldableSchedWrite sched, bit IsCommutable, 5341 string EVEX2VexOvrd> { 5342 let ExeDomain = _.ExeDomain in { 5343 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5344 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5345 "$src2, $src1", "$src1, $src2", 5346 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5347 Sched<[sched]>, SIMD_EXC; 5348 5349 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5350 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5351 "$src2, $src1", "$src1, $src2", 5352 (_.VT (VecNode _.RC:$src1, 5353 _.ScalarIntMemCPat:$src2))>, 5354 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5355 5356 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5357 Uses = [MXCSR], mayRaiseFPException = 1 in { 5358 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5359 (ins _.FRC:$src1, _.FRC:$src2), 5360 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5361 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5362 Sched<[sched]>, 5363 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5364 let isCommutable = IsCommutable; 5365 } 5366 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5367 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5368 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5369 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5370 (_.ScalarLdFrag addr:$src2)))]>, 5371 Sched<[sched.Folded, sched.ReadAfterFold]>, 5372 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5373 } 5374 5375 let Uses = [MXCSR] in 5376 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5377 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5378 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5379 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5380 EVEX_B, Sched<[sched]>; 5381 } 5382} 5383 5384multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 5385 SDNode VecNode, SDNode RndNode, 5386 X86SchedWriteSizes sched, bit IsCommutable> { 5387 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5388 sched.PS.Scl, IsCommutable>, 5389 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5390 sched.PS.Scl, IsCommutable>, 5391 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5392 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5393 sched.PD.Scl, IsCommutable>, 5394 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5395 sched.PD.Scl, IsCommutable>, 5396 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5397} 5398 5399multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5400 SDNode VecNode, SDNode SaeNode, 5401 X86SchedWriteSizes sched, bit IsCommutable> { 5402 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5403 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5404 NAME#"SS">, 5405 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5406 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5407 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5408 NAME#"SD">, 5409 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5410} 5411defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5412 SchedWriteFAddSizes, 1>; 5413defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5414 SchedWriteFMulSizes, 1>; 5415defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5416 SchedWriteFAddSizes, 0>; 5417defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5418 SchedWriteFDivSizes, 0>; 5419defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5420 SchedWriteFCmpSizes, 0>; 5421defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5422 SchedWriteFCmpSizes, 0>; 5423 5424// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5425// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5426multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5427 X86VectorVTInfo _, SDNode OpNode, 5428 X86FoldableSchedWrite sched, 5429 string EVEX2VEXOvrd> { 5430 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5431 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5432 (ins _.FRC:$src1, _.FRC:$src2), 5433 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5434 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5435 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5436 let isCommutable = 1; 5437 } 5438 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5439 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5440 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5441 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5442 (_.ScalarLdFrag addr:$src2)))]>, 5443 Sched<[sched.Folded, sched.ReadAfterFold]>, 5444 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5445 } 5446} 5447defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5448 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5449 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5450 5451defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5452 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5453 VEX_W, EVEX_4V, VEX_LIG, 5454 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5455 5456defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5457 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5458 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5459 5460defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5461 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5462 VEX_W, EVEX_4V, VEX_LIG, 5463 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5464 5465multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5466 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5467 bit IsCommutable, 5468 bit IsKCommutable = IsCommutable> { 5469 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5470 Uses = [MXCSR], mayRaiseFPException = 1 in { 5471 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5472 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5473 "$src2, $src1", "$src1, $src2", 5474 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 5475 IsKCommutable, IsKCommutable>, 5476 EVEX_4V, Sched<[sched]>; 5477 let mayLoad = 1 in { 5478 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5479 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5480 "$src2, $src1", "$src1, $src2", 5481 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5482 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5483 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5484 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5485 "${src2}"##_.BroadcastStr##", $src1", 5486 "$src1, ${src2}"##_.BroadcastStr, 5487 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5488 EVEX_4V, EVEX_B, 5489 Sched<[sched.Folded, sched.ReadAfterFold]>; 5490 } 5491 } 5492} 5493 5494multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5495 SDPatternOperator OpNodeRnd, 5496 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5497 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5498 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5499 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, 5500 "$rc, $src2, $src1", "$src1, $src2, $rc", 5501 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>, 5502 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5503} 5504 5505multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5506 SDPatternOperator OpNodeSAE, 5507 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5508 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5509 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5510 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5511 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5512 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5513 EVEX_4V, EVEX_B, Sched<[sched]>; 5514} 5515 5516multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5517 Predicate prd, X86SchedWriteSizes sched, 5518 bit IsCommutable = 0, 5519 bit IsPD128Commutable = IsCommutable> { 5520 let Predicates = [prd] in { 5521 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, 5522 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5523 EVEX_CD8<32, CD8VF>; 5524 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, 5525 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5526 EVEX_CD8<64, CD8VF>; 5527 } 5528 5529 // Define only if AVX512VL feature is present. 5530 let Predicates = [prd, HasVLX] in { 5531 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, 5532 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5533 EVEX_CD8<32, CD8VF>; 5534 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, 5535 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5536 EVEX_CD8<32, CD8VF>; 5537 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, 5538 sched.PD.XMM, IsPD128Commutable, 5539 IsCommutable>, EVEX_V128, PD, VEX_W, 5540 EVEX_CD8<64, CD8VF>; 5541 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, 5542 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5543 EVEX_CD8<64, CD8VF>; 5544 } 5545} 5546 5547let Uses = [MXCSR] in 5548multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5549 X86SchedWriteSizes sched> { 5550 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5551 v16f32_info>, 5552 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5553 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5554 v8f64_info>, 5555 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5556} 5557 5558let Uses = [MXCSR] in 5559multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5560 X86SchedWriteSizes sched> { 5561 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5562 v16f32_info>, 5563 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5564 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5565 v8f64_info>, 5566 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5567} 5568 5569defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512, 5570 SchedWriteFAddSizes, 1>, 5571 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5572defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512, 5573 SchedWriteFMulSizes, 1>, 5574 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5575defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512, 5576 SchedWriteFAddSizes>, 5577 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5578defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512, 5579 SchedWriteFDivSizes>, 5580 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5581defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, 5582 SchedWriteFCmpSizes, 0>, 5583 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5584defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, 5585 SchedWriteFCmpSizes, 0>, 5586 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5587let isCodeGenOnly = 1 in { 5588 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, 5589 SchedWriteFCmpSizes, 1>; 5590 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, 5591 SchedWriteFCmpSizes, 1>; 5592} 5593let Uses = []<Register>, mayRaiseFPException = 0 in { 5594defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, 5595 SchedWriteFLogicSizes, 1>; 5596defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, 5597 SchedWriteFLogicSizes, 0>; 5598defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, 5599 SchedWriteFLogicSizes, 1>; 5600defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, 5601 SchedWriteFLogicSizes, 1>; 5602} 5603 5604multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5605 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5606 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5607 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5609 "$src2, $src1", "$src1, $src2", 5610 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5611 EVEX_4V, Sched<[sched]>; 5612 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5613 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5614 "$src2, $src1", "$src1, $src2", 5615 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5616 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5617 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5618 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5619 "${src2}"##_.BroadcastStr##", $src1", 5620 "$src1, ${src2}"##_.BroadcastStr, 5621 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5622 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5623 } 5624} 5625 5626multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5627 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5628 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5629 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5630 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5631 "$src2, $src1", "$src1, $src2", 5632 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5633 Sched<[sched]>; 5634 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5635 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix, 5636 "$src2, $src1", "$src1, $src2", 5637 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>, 5638 Sched<[sched.Folded, sched.ReadAfterFold]>; 5639 } 5640} 5641 5642multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5643 X86SchedWriteWidths sched> { 5644 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5645 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5646 EVEX_V512, EVEX_CD8<32, CD8VF>; 5647 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5648 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5649 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 5650 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5651 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, 5652 X86scalefsRnd, sched.Scl>, 5653 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5654 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5655 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, 5656 X86scalefsRnd, sched.Scl>, 5657 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W; 5658 5659 // Define only if AVX512VL feature is present. 5660 let Predicates = [HasVLX] in { 5661 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5662 EVEX_V128, EVEX_CD8<32, CD8VF>; 5663 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5664 EVEX_V256, EVEX_CD8<32, CD8VF>; 5665 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5666 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 5667 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5668 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 5669 } 5670} 5671defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 5672 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; 5673 5674//===----------------------------------------------------------------------===// 5675// AVX-512 VPTESTM instructions 5676//===----------------------------------------------------------------------===// 5677 5678multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5679 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5680 string Name> { 5681 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5682 // There are just too many permuations due to commutability and bitcasts. 5683 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5684 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5685 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5686 "$src2, $src1", "$src1, $src2", 5687 (null_frag), (null_frag), 1>, 5688 EVEX_4V, Sched<[sched]>; 5689 let mayLoad = 1 in 5690 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5691 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5692 "$src2, $src1", "$src1, $src2", 5693 (null_frag), (null_frag)>, 5694 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5695 Sched<[sched.Folded, sched.ReadAfterFold]>; 5696 } 5697} 5698 5699multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5700 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5701 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5702 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5703 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5704 "${src2}"##_.BroadcastStr##", $src1", 5705 "$src1, ${src2}"##_.BroadcastStr, 5706 (null_frag), (null_frag)>, 5707 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5708 Sched<[sched.Folded, sched.ReadAfterFold]>; 5709} 5710 5711multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5712 X86SchedWriteWidths sched, 5713 AVX512VLVectorVTInfo _> { 5714 let Predicates = [HasAVX512] in 5715 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>, 5716 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5717 5718 let Predicates = [HasAVX512, HasVLX] in { 5719 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>, 5720 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5721 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>, 5722 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5723 } 5724} 5725 5726multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5727 X86SchedWriteWidths sched> { 5728 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5729 avx512vl_i32_info>; 5730 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5731 avx512vl_i64_info>, VEX_W; 5732} 5733 5734multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5735 X86SchedWriteWidths sched> { 5736 let Predicates = [HasBWI] in { 5737 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5738 v32i16_info, NAME#"W">, EVEX_V512, VEX_W; 5739 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5740 v64i8_info, NAME#"B">, EVEX_V512; 5741 } 5742 let Predicates = [HasVLX, HasBWI] in { 5743 5744 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5745 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W; 5746 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5747 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W; 5748 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5749 v32i8x_info, NAME#"B">, EVEX_V256; 5750 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5751 v16i8x_info, NAME#"B">, EVEX_V128; 5752 } 5753} 5754 5755multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5756 X86SchedWriteWidths sched> : 5757 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5758 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5759 5760defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5761 SchedWriteVecLogic>, T8PD; 5762defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5763 SchedWriteVecLogic>, T8XS; 5764 5765//===----------------------------------------------------------------------===// 5766// AVX-512 Shift instructions 5767//===----------------------------------------------------------------------===// 5768 5769multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5770 string OpcodeStr, SDNode OpNode, 5771 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5772 let ExeDomain = _.ExeDomain in { 5773 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5774 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5775 "$src2, $src1", "$src1, $src2", 5776 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5777 Sched<[sched]>; 5778 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5779 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5780 "$src2, $src1", "$src1, $src2", 5781 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5782 (i8 timm:$src2)))>, 5783 Sched<[sched.Folded]>; 5784 } 5785} 5786 5787multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5788 string OpcodeStr, SDNode OpNode, 5789 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5790 let ExeDomain = _.ExeDomain in 5791 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5792 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5793 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", 5794 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5795 EVEX_B, Sched<[sched.Folded]>; 5796} 5797 5798multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5799 X86FoldableSchedWrite sched, ValueType SrcVT, 5800 X86VectorVTInfo _> { 5801 // src2 is always 128-bit 5802 let ExeDomain = _.ExeDomain in { 5803 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5804 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5805 "$src2, $src1", "$src1, $src2", 5806 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5807 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5808 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5809 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5810 "$src2, $src1", "$src1, $src2", 5811 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5812 AVX512BIBase, 5813 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5814 } 5815} 5816 5817multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5818 X86SchedWriteWidths sched, ValueType SrcVT, 5819 AVX512VLVectorVTInfo VTInfo, 5820 Predicate prd> { 5821 let Predicates = [prd] in 5822 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5823 VTInfo.info512>, EVEX_V512, 5824 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5825 let Predicates = [prd, HasVLX] in { 5826 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5827 VTInfo.info256>, EVEX_V256, 5828 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5829 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5830 VTInfo.info128>, EVEX_V128, 5831 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5832 } 5833} 5834 5835multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5836 string OpcodeStr, SDNode OpNode, 5837 X86SchedWriteWidths sched, 5838 bit NotEVEX2VEXConvertibleQ = 0> { 5839 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5840 avx512vl_i32_info, HasAVX512>; 5841 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5842 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5843 avx512vl_i64_info, HasAVX512>, VEX_W; 5844 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5845 avx512vl_i16_info, HasBWI>; 5846} 5847 5848multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5849 string OpcodeStr, SDNode OpNode, 5850 X86SchedWriteWidths sched, 5851 AVX512VLVectorVTInfo VTInfo> { 5852 let Predicates = [HasAVX512] in 5853 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5854 sched.ZMM, VTInfo.info512>, 5855 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5856 VTInfo.info512>, EVEX_V512; 5857 let Predicates = [HasAVX512, HasVLX] in { 5858 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5859 sched.YMM, VTInfo.info256>, 5860 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5861 VTInfo.info256>, EVEX_V256; 5862 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5863 sched.XMM, VTInfo.info128>, 5864 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5865 VTInfo.info128>, EVEX_V128; 5866 } 5867} 5868 5869multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5870 string OpcodeStr, SDNode OpNode, 5871 X86SchedWriteWidths sched> { 5872 let Predicates = [HasBWI] in 5873 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5874 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 5875 let Predicates = [HasVLX, HasBWI] in { 5876 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5877 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 5878 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5879 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 5880 } 5881} 5882 5883multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5884 Format ImmFormR, Format ImmFormM, 5885 string OpcodeStr, SDNode OpNode, 5886 X86SchedWriteWidths sched, 5887 bit NotEVEX2VEXConvertibleQ = 0> { 5888 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 5889 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 5890 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5891 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 5892 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 5893} 5894 5895defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 5896 SchedWriteVecShiftImm>, 5897 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 5898 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5899 5900defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 5901 SchedWriteVecShiftImm>, 5902 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 5903 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5904 5905defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 5906 SchedWriteVecShiftImm, 1>, 5907 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 5908 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5909 5910defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 5911 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5912defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 5913 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5914 5915defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 5916 SchedWriteVecShift>; 5917defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 5918 SchedWriteVecShift, 1>; 5919defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 5920 SchedWriteVecShift>; 5921 5922// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 5923let Predicates = [HasAVX512, NoVLX] in { 5924 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 5925 (EXTRACT_SUBREG (v8i64 5926 (VPSRAQZrr 5927 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5928 VR128X:$src2)), sub_ymm)>; 5929 5930 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5931 (EXTRACT_SUBREG (v8i64 5932 (VPSRAQZrr 5933 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5934 VR128X:$src2)), sub_xmm)>; 5935 5936 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 5937 (EXTRACT_SUBREG (v8i64 5938 (VPSRAQZri 5939 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5940 timm:$src2)), sub_ymm)>; 5941 5942 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 5943 (EXTRACT_SUBREG (v8i64 5944 (VPSRAQZri 5945 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5946 timm:$src2)), sub_xmm)>; 5947} 5948 5949//===-------------------------------------------------------------------===// 5950// Variable Bit Shifts 5951//===-------------------------------------------------------------------===// 5952 5953multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 5954 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5955 let ExeDomain = _.ExeDomain in { 5956 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5957 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5958 "$src2, $src1", "$src1, $src2", 5959 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 5960 AVX5128IBase, EVEX_4V, Sched<[sched]>; 5961 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5962 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5963 "$src2, $src1", "$src1, $src2", 5964 (_.VT (OpNode _.RC:$src1, 5965 (_.VT (_.LdFrag addr:$src2))))>, 5966 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5967 Sched<[sched.Folded, sched.ReadAfterFold]>; 5968 } 5969} 5970 5971multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5972 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5973 let ExeDomain = _.ExeDomain in 5974 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5975 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5976 "${src2}"##_.BroadcastStr##", $src1", 5977 "$src1, ${src2}"##_.BroadcastStr, 5978 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 5979 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5980 Sched<[sched.Folded, sched.ReadAfterFold]>; 5981} 5982 5983multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5984 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 5985 let Predicates = [HasAVX512] in 5986 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 5987 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 5988 5989 let Predicates = [HasAVX512, HasVLX] in { 5990 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 5991 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 5992 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 5993 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 5994 } 5995} 5996 5997multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 5998 SDNode OpNode, X86SchedWriteWidths sched> { 5999 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6000 avx512vl_i32_info>; 6001 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6002 avx512vl_i64_info>, VEX_W; 6003} 6004 6005// Use 512bit version to implement 128/256 bit in case NoVLX. 6006multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6007 SDNode OpNode, list<Predicate> p> { 6008 let Predicates = p in { 6009 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6010 (_.info256.VT _.info256.RC:$src2))), 6011 (EXTRACT_SUBREG 6012 (!cast<Instruction>(OpcodeStr#"Zrr") 6013 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6014 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6015 sub_ymm)>; 6016 6017 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6018 (_.info128.VT _.info128.RC:$src2))), 6019 (EXTRACT_SUBREG 6020 (!cast<Instruction>(OpcodeStr#"Zrr") 6021 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6022 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6023 sub_xmm)>; 6024 } 6025} 6026multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6027 SDNode OpNode, X86SchedWriteWidths sched> { 6028 let Predicates = [HasBWI] in 6029 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6030 EVEX_V512, VEX_W; 6031 let Predicates = [HasVLX, HasBWI] in { 6032 6033 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6034 EVEX_V256, VEX_W; 6035 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6036 EVEX_V128, VEX_W; 6037 } 6038} 6039 6040defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6041 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6042 6043defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6044 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6045 6046defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6047 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6048 6049defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6050defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6051 6052defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6053defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6054defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6055defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6056 6057 6058// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6059let Predicates = [HasAVX512, NoVLX] in { 6060 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6061 (EXTRACT_SUBREG (v8i64 6062 (VPROLVQZrr 6063 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6064 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6065 sub_xmm)>; 6066 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6067 (EXTRACT_SUBREG (v8i64 6068 (VPROLVQZrr 6069 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6070 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6071 sub_ymm)>; 6072 6073 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6074 (EXTRACT_SUBREG (v16i32 6075 (VPROLVDZrr 6076 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6077 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6078 sub_xmm)>; 6079 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6080 (EXTRACT_SUBREG (v16i32 6081 (VPROLVDZrr 6082 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6083 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6084 sub_ymm)>; 6085 6086 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6087 (EXTRACT_SUBREG (v8i64 6088 (VPROLQZri 6089 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6090 timm:$src2)), sub_xmm)>; 6091 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6092 (EXTRACT_SUBREG (v8i64 6093 (VPROLQZri 6094 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6095 timm:$src2)), sub_ymm)>; 6096 6097 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6098 (EXTRACT_SUBREG (v16i32 6099 (VPROLDZri 6100 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6101 timm:$src2)), sub_xmm)>; 6102 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6103 (EXTRACT_SUBREG (v16i32 6104 (VPROLDZri 6105 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6106 timm:$src2)), sub_ymm)>; 6107} 6108 6109// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6110let Predicates = [HasAVX512, NoVLX] in { 6111 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6112 (EXTRACT_SUBREG (v8i64 6113 (VPRORVQZrr 6114 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6115 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6116 sub_xmm)>; 6117 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6118 (EXTRACT_SUBREG (v8i64 6119 (VPRORVQZrr 6120 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6121 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6122 sub_ymm)>; 6123 6124 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6125 (EXTRACT_SUBREG (v16i32 6126 (VPRORVDZrr 6127 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6128 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6129 sub_xmm)>; 6130 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6131 (EXTRACT_SUBREG (v16i32 6132 (VPRORVDZrr 6133 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6134 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6135 sub_ymm)>; 6136 6137 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6138 (EXTRACT_SUBREG (v8i64 6139 (VPRORQZri 6140 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6141 timm:$src2)), sub_xmm)>; 6142 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6143 (EXTRACT_SUBREG (v8i64 6144 (VPRORQZri 6145 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6146 timm:$src2)), sub_ymm)>; 6147 6148 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6149 (EXTRACT_SUBREG (v16i32 6150 (VPRORDZri 6151 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6152 timm:$src2)), sub_xmm)>; 6153 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6154 (EXTRACT_SUBREG (v16i32 6155 (VPRORDZri 6156 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6157 timm:$src2)), sub_ymm)>; 6158} 6159 6160//===-------------------------------------------------------------------===// 6161// 1-src variable permutation VPERMW/D/Q 6162//===-------------------------------------------------------------------===// 6163 6164multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6165 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6166 let Predicates = [HasAVX512] in 6167 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6168 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6169 6170 let Predicates = [HasAVX512, HasVLX] in 6171 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6172 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6173} 6174 6175multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6176 string OpcodeStr, SDNode OpNode, 6177 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6178 let Predicates = [HasAVX512] in 6179 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6180 sched, VTInfo.info512>, 6181 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6182 sched, VTInfo.info512>, EVEX_V512; 6183 let Predicates = [HasAVX512, HasVLX] in 6184 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6185 sched, VTInfo.info256>, 6186 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6187 sched, VTInfo.info256>, EVEX_V256; 6188} 6189 6190multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6191 Predicate prd, SDNode OpNode, 6192 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6193 let Predicates = [prd] in 6194 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6195 EVEX_V512 ; 6196 let Predicates = [HasVLX, prd] in { 6197 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6198 EVEX_V256 ; 6199 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6200 EVEX_V128 ; 6201 } 6202} 6203 6204defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6205 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6206defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6207 WriteVarShuffle256, avx512vl_i8_info>; 6208 6209defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6210 WriteVarShuffle256, avx512vl_i32_info>; 6211defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6212 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6213defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6214 WriteFVarShuffle256, avx512vl_f32_info>; 6215defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6216 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6217 6218defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6219 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6220 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6221defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6222 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6223 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6224 6225//===----------------------------------------------------------------------===// 6226// AVX-512 - VPERMIL 6227//===----------------------------------------------------------------------===// 6228 6229multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6230 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6231 X86VectorVTInfo Ctrl> { 6232 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6233 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6234 "$src2, $src1", "$src1, $src2", 6235 (_.VT (OpNode _.RC:$src1, 6236 (Ctrl.VT Ctrl.RC:$src2)))>, 6237 T8PD, EVEX_4V, Sched<[sched]>; 6238 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6239 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6240 "$src2, $src1", "$src1, $src2", 6241 (_.VT (OpNode 6242 _.RC:$src1, 6243 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6244 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6245 Sched<[sched.Folded, sched.ReadAfterFold]>; 6246 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6247 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6248 "${src2}"##_.BroadcastStr##", $src1", 6249 "$src1, ${src2}"##_.BroadcastStr, 6250 (_.VT (OpNode 6251 _.RC:$src1, 6252 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6253 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6254 Sched<[sched.Folded, sched.ReadAfterFold]>; 6255} 6256 6257multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6258 X86SchedWriteWidths sched, 6259 AVX512VLVectorVTInfo _, 6260 AVX512VLVectorVTInfo Ctrl> { 6261 let Predicates = [HasAVX512] in { 6262 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6263 _.info512, Ctrl.info512>, EVEX_V512; 6264 } 6265 let Predicates = [HasAVX512, HasVLX] in { 6266 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6267 _.info128, Ctrl.info128>, EVEX_V128; 6268 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6269 _.info256, Ctrl.info256>, EVEX_V256; 6270 } 6271} 6272 6273multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6274 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6275 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6276 _, Ctrl>; 6277 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6278 X86VPermilpi, SchedWriteFShuffle, _>, 6279 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6280} 6281 6282let ExeDomain = SSEPackedSingle in 6283defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6284 avx512vl_i32_info>; 6285let ExeDomain = SSEPackedDouble in 6286defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6287 avx512vl_i64_info>, VEX_W1X; 6288 6289//===----------------------------------------------------------------------===// 6290// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6291//===----------------------------------------------------------------------===// 6292 6293defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6294 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6295 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6296defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6297 X86PShufhw, SchedWriteShuffle>, 6298 EVEX, AVX512XSIi8Base; 6299defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6300 X86PShuflw, SchedWriteShuffle>, 6301 EVEX, AVX512XDIi8Base; 6302 6303//===----------------------------------------------------------------------===// 6304// AVX-512 - VPSHUFB 6305//===----------------------------------------------------------------------===// 6306 6307multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6308 X86SchedWriteWidths sched> { 6309 let Predicates = [HasBWI] in 6310 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6311 EVEX_V512; 6312 6313 let Predicates = [HasVLX, HasBWI] in { 6314 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6315 EVEX_V256; 6316 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6317 EVEX_V128; 6318 } 6319} 6320 6321defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6322 SchedWriteVarShuffle>, VEX_WIG; 6323 6324//===----------------------------------------------------------------------===// 6325// Move Low to High and High to Low packed FP Instructions 6326//===----------------------------------------------------------------------===// 6327 6328def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6329 (ins VR128X:$src1, VR128X:$src2), 6330 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6331 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6332 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6333let isCommutable = 1 in 6334def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6335 (ins VR128X:$src1, VR128X:$src2), 6336 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6337 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6338 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6339 6340//===----------------------------------------------------------------------===// 6341// VMOVHPS/PD VMOVLPS Instructions 6342// All patterns was taken from SSS implementation. 6343//===----------------------------------------------------------------------===// 6344 6345multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6346 SDPatternOperator OpNode, 6347 X86VectorVTInfo _> { 6348 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6349 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6350 (ins _.RC:$src1, f64mem:$src2), 6351 !strconcat(OpcodeStr, 6352 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6353 [(set _.RC:$dst, 6354 (OpNode _.RC:$src1, 6355 (_.VT (bitconvert 6356 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6357 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6358} 6359 6360// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6361// SSE1. And MOVLPS pattern is even more complex. 6362defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6363 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6364defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6365 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6366defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6367 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6368defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6369 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6370 6371let Predicates = [HasAVX512] in { 6372 // VMOVHPD patterns 6373 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, 6374 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 6375 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6376 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6377 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6378 6379 // VMOVLPD patterns 6380 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6381 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6382} 6383 6384let SchedRW = [WriteFStore] in { 6385let mayStore = 1, hasSideEffects = 0 in 6386def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6387 (ins f64mem:$dst, VR128X:$src), 6388 "vmovhps\t{$src, $dst|$dst, $src}", 6389 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6390def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6391 (ins f64mem:$dst, VR128X:$src), 6392 "vmovhpd\t{$src, $dst|$dst, $src}", 6393 [(store (f64 (extractelt 6394 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6395 (iPTR 0))), addr:$dst)]>, 6396 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6397let mayStore = 1, hasSideEffects = 0 in 6398def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6399 (ins f64mem:$dst, VR128X:$src), 6400 "vmovlps\t{$src, $dst|$dst, $src}", 6401 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6402def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6403 (ins f64mem:$dst, VR128X:$src), 6404 "vmovlpd\t{$src, $dst|$dst, $src}", 6405 [(store (f64 (extractelt (v2f64 VR128X:$src), 6406 (iPTR 0))), addr:$dst)]>, 6407 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6408} // SchedRW 6409 6410let Predicates = [HasAVX512] in { 6411 // VMOVHPD patterns 6412 def : Pat<(store (f64 (extractelt 6413 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6414 (iPTR 0))), addr:$dst), 6415 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6416} 6417//===----------------------------------------------------------------------===// 6418// FMA - Fused Multiply Operations 6419// 6420 6421multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6422 X86FoldableSchedWrite sched, 6423 X86VectorVTInfo _, string Suff> { 6424 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6425 Uses = [MXCSR], mayRaiseFPException = 1 in { 6426 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6427 (ins _.RC:$src2, _.RC:$src3), 6428 OpcodeStr, "$src3, $src2", "$src2, $src3", 6429 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6430 AVX512FMA3Base, Sched<[sched]>; 6431 6432 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6433 (ins _.RC:$src2, _.MemOp:$src3), 6434 OpcodeStr, "$src3, $src2", "$src2, $src3", 6435 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6436 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6437 6438 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6439 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6440 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6441 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6442 (OpNode _.RC:$src2, 6443 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6444 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6445 } 6446} 6447 6448multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6449 X86FoldableSchedWrite sched, 6450 X86VectorVTInfo _, string Suff> { 6451 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6452 Uses = [MXCSR] in 6453 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6454 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6455 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6456 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6457 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6458} 6459 6460multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6461 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6462 AVX512VLVectorVTInfo _, string Suff> { 6463 let Predicates = [HasAVX512] in { 6464 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6465 _.info512, Suff>, 6466 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6467 _.info512, Suff>, 6468 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6469 } 6470 let Predicates = [HasVLX, HasAVX512] in { 6471 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM, 6472 _.info256, Suff>, 6473 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6474 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM, 6475 _.info128, Suff>, 6476 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6477 } 6478} 6479 6480multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6481 SDNode OpNodeRnd> { 6482 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6483 SchedWriteFMA, avx512vl_f32_info, "PS">; 6484 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6485 SchedWriteFMA, avx512vl_f64_info, "PD">, 6486 VEX_W; 6487} 6488 6489defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>; 6490defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; 6491defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; 6492defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; 6493defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; 6494defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; 6495 6496 6497multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6498 X86FoldableSchedWrite sched, 6499 X86VectorVTInfo _, string Suff> { 6500 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6501 Uses = [MXCSR], mayRaiseFPException = 1 in { 6502 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6503 (ins _.RC:$src2, _.RC:$src3), 6504 OpcodeStr, "$src3, $src2", "$src2, $src3", 6505 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1, 6506 vselect, 1>, AVX512FMA3Base, Sched<[sched]>; 6507 6508 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6509 (ins _.RC:$src2, _.MemOp:$src3), 6510 OpcodeStr, "$src3, $src2", "$src2, $src3", 6511 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6512 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6513 6514 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6515 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6516 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6517 "$src2, ${src3}"##_.BroadcastStr, 6518 (_.VT (OpNode _.RC:$src2, 6519 (_.VT (_.BroadcastLdFrag addr:$src3)), 6520 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, 6521 Sched<[sched.Folded, sched.ReadAfterFold]>; 6522 } 6523} 6524 6525multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6526 X86FoldableSchedWrite sched, 6527 X86VectorVTInfo _, string Suff> { 6528 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6529 Uses = [MXCSR] in 6530 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6531 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6532 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6533 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6534 1, 1, vselect, 1>, 6535 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6536} 6537 6538multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6539 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6540 AVX512VLVectorVTInfo _, string Suff> { 6541 let Predicates = [HasAVX512] in { 6542 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6543 _.info512, Suff>, 6544 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6545 _.info512, Suff>, 6546 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6547 } 6548 let Predicates = [HasVLX, HasAVX512] in { 6549 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM, 6550 _.info256, Suff>, 6551 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6552 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM, 6553 _.info128, Suff>, 6554 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6555 } 6556} 6557 6558multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6559 SDNode OpNodeRnd > { 6560 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6561 SchedWriteFMA, avx512vl_f32_info, "PS">; 6562 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6563 SchedWriteFMA, avx512vl_f64_info, "PD">, 6564 VEX_W; 6565} 6566 6567defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>; 6568defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; 6569defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; 6570defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; 6571defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; 6572defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; 6573 6574multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6575 X86FoldableSchedWrite sched, 6576 X86VectorVTInfo _, string Suff> { 6577 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6578 Uses = [MXCSR], mayRaiseFPException = 1 in { 6579 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6580 (ins _.RC:$src2, _.RC:$src3), 6581 OpcodeStr, "$src3, $src2", "$src2, $src3", 6582 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>, 6583 AVX512FMA3Base, Sched<[sched]>; 6584 6585 // Pattern is 312 order so that the load is in a different place from the 6586 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6587 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6588 (ins _.RC:$src2, _.MemOp:$src3), 6589 OpcodeStr, "$src3, $src2", "$src2, $src3", 6590 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6591 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6592 6593 // Pattern is 312 order so that the load is in a different place from the 6594 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6595 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6596 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6597 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6598 "$src2, ${src3}"##_.BroadcastStr, 6599 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6600 _.RC:$src1, _.RC:$src2)), 1, 0>, 6601 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6602 } 6603} 6604 6605multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6606 X86FoldableSchedWrite sched, 6607 X86VectorVTInfo _, string Suff> { 6608 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6609 Uses = [MXCSR] in 6610 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6611 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6612 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6613 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6614 1, 1, vselect, 1>, 6615 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6616} 6617 6618multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6619 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6620 AVX512VLVectorVTInfo _, string Suff> { 6621 let Predicates = [HasAVX512] in { 6622 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6623 _.info512, Suff>, 6624 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6625 _.info512, Suff>, 6626 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6627 } 6628 let Predicates = [HasVLX, HasAVX512] in { 6629 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM, 6630 _.info256, Suff>, 6631 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6632 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM, 6633 _.info128, Suff>, 6634 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6635 } 6636} 6637 6638multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6639 SDNode OpNodeRnd > { 6640 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6641 SchedWriteFMA, avx512vl_f32_info, "PS">; 6642 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6643 SchedWriteFMA, avx512vl_f64_info, "PD">, 6644 VEX_W; 6645} 6646 6647defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>; 6648defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; 6649defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; 6650defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; 6651defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; 6652defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; 6653 6654// Scalar FMA 6655multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6656 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6657let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6658 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6659 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6660 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6661 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6662 6663 let mayLoad = 1 in 6664 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6665 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6666 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6667 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6668 6669 let Uses = [MXCSR] in 6670 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6671 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6672 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6673 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6674 6675 let isCodeGenOnly = 1, isCommutable = 1 in { 6676 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6677 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6678 !strconcat(OpcodeStr, 6679 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6680 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6681 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst), 6682 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6683 !strconcat(OpcodeStr, 6684 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6685 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6686 6687 let Uses = [MXCSR] in 6688 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6689 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6690 !strconcat(OpcodeStr, 6691 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6692 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6693 Sched<[SchedWriteFMA.Scl]>; 6694 }// isCodeGenOnly = 1 6695}// Constraints = "$src1 = $dst" 6696} 6697 6698multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6699 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 6700 X86VectorVTInfo _, string SUFF> { 6701 let ExeDomain = _.ExeDomain in { 6702 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6703 // Operands for intrinsic are in 123 order to preserve passthu 6704 // semantics. 6705 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6706 _.FRC:$src3))), 6707 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6708 (_.ScalarLdFrag addr:$src3)))), 6709 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6710 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6711 6712 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6713 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6714 _.FRC:$src1))), 6715 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6716 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6717 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6718 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6719 6720 // One pattern is 312 order so that the load is in a different place from the 6721 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6722 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6723 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6724 _.FRC:$src2))), 6725 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6726 _.FRC:$src1, _.FRC:$src2))), 6727 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6728 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6729 } 6730} 6731 6732multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6733 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { 6734 let Predicates = [HasAVX512] in { 6735 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6736 OpNodeRnd, f32x_info, "SS">, 6737 EVEX_CD8<32, CD8VT1>, VEX_LIG; 6738 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6739 OpNodeRnd, f64x_info, "SD">, 6740 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 6741 } 6742} 6743 6744defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>; 6745defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; 6746defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; 6747defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; 6748 6749multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, 6750 string Suffix, SDNode Move, 6751 X86VectorVTInfo _, PatLeaf ZeroFP> { 6752 let Predicates = [HasAVX512] in { 6753 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6754 (Op _.FRC:$src2, 6755 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6756 _.FRC:$src3))))), 6757 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6758 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6759 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6760 6761 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6762 (Op _.FRC:$src2, _.FRC:$src3, 6763 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6764 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6765 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6766 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6767 6768 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6769 (Op _.FRC:$src2, 6770 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6771 (_.ScalarLdFrag addr:$src3)))))), 6772 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6773 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6774 addr:$src3)>; 6775 6776 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6777 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6778 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6779 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6780 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6781 addr:$src3)>; 6782 6783 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6784 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6785 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6786 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6787 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6788 addr:$src3)>; 6789 6790 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6791 (X86selects VK1WM:$mask, 6792 (Op _.FRC:$src2, 6793 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6794 _.FRC:$src3), 6795 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6796 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6797 VR128X:$src1, VK1WM:$mask, 6798 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6799 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6800 6801 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6802 (X86selects VK1WM:$mask, 6803 (Op _.FRC:$src2, 6804 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6805 (_.ScalarLdFrag addr:$src3)), 6806 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6807 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6808 VR128X:$src1, VK1WM:$mask, 6809 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6810 6811 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6812 (X86selects VK1WM:$mask, 6813 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6814 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6815 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6816 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6817 VR128X:$src1, VK1WM:$mask, 6818 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6819 6820 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6821 (X86selects VK1WM:$mask, 6822 (Op _.FRC:$src2, _.FRC:$src3, 6823 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6824 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6825 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 6826 VR128X:$src1, VK1WM:$mask, 6827 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6828 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6829 6830 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6831 (X86selects VK1WM:$mask, 6832 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6833 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6834 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6835 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 6836 VR128X:$src1, VK1WM:$mask, 6837 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6838 6839 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6840 (X86selects VK1WM:$mask, 6841 (Op _.FRC:$src2, 6842 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6843 _.FRC:$src3), 6844 (_.EltVT ZeroFP)))))), 6845 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 6846 VR128X:$src1, VK1WM:$mask, 6847 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6848 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6849 6850 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6851 (X86selects VK1WM:$mask, 6852 (Op _.FRC:$src2, _.FRC:$src3, 6853 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6854 (_.EltVT ZeroFP)))))), 6855 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 6856 VR128X:$src1, VK1WM:$mask, 6857 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6858 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6859 6860 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6861 (X86selects VK1WM:$mask, 6862 (Op _.FRC:$src2, 6863 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6864 (_.ScalarLdFrag addr:$src3)), 6865 (_.EltVT ZeroFP)))))), 6866 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 6867 VR128X:$src1, VK1WM:$mask, 6868 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6869 6870 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6871 (X86selects VK1WM:$mask, 6872 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6873 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 6874 (_.EltVT ZeroFP)))))), 6875 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 6876 VR128X:$src1, VK1WM:$mask, 6877 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6878 6879 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6880 (X86selects VK1WM:$mask, 6881 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6882 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6883 (_.EltVT ZeroFP)))))), 6884 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 6885 VR128X:$src1, VK1WM:$mask, 6886 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6887 6888 // Patterns with rounding mode. 6889 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6890 (RndOp _.FRC:$src2, 6891 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6892 _.FRC:$src3, (i32 timm:$rc)))))), 6893 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 6894 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6895 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6896 6897 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6898 (RndOp _.FRC:$src2, _.FRC:$src3, 6899 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6900 (i32 timm:$rc)))))), 6901 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 6902 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6903 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6904 6905 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6906 (X86selects VK1WM:$mask, 6907 (RndOp _.FRC:$src2, 6908 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6909 _.FRC:$src3, (i32 timm:$rc)), 6910 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6911 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 6912 VR128X:$src1, VK1WM:$mask, 6913 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6914 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6915 6916 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6917 (X86selects VK1WM:$mask, 6918 (RndOp _.FRC:$src2, _.FRC:$src3, 6919 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6920 (i32 timm:$rc)), 6921 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6922 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 6923 VR128X:$src1, VK1WM:$mask, 6924 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6925 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6926 6927 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6928 (X86selects VK1WM:$mask, 6929 (RndOp _.FRC:$src2, 6930 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6931 _.FRC:$src3, (i32 timm:$rc)), 6932 (_.EltVT ZeroFP)))))), 6933 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 6934 VR128X:$src1, VK1WM:$mask, 6935 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6936 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6937 6938 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6939 (X86selects VK1WM:$mask, 6940 (RndOp _.FRC:$src2, _.FRC:$src3, 6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6942 (i32 timm:$rc)), 6943 (_.EltVT ZeroFP)))))), 6944 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 6945 VR128X:$src1, VK1WM:$mask, 6946 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6947 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6948 } 6949} 6950 6951defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS", 6952 X86Movss, v4f32x_info, fp32imm0>; 6953defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS", 6954 X86Movss, v4f32x_info, fp32imm0>; 6955defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS", 6956 X86Movss, v4f32x_info, fp32imm0>; 6957defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS", 6958 X86Movss, v4f32x_info, fp32imm0>; 6959 6960defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD", 6961 X86Movsd, v2f64x_info, fp64imm0>; 6962defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD", 6963 X86Movsd, v2f64x_info, fp64imm0>; 6964defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD", 6965 X86Movsd, v2f64x_info, fp64imm0>; 6966defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD", 6967 X86Movsd, v2f64x_info, fp64imm0>; 6968 6969//===----------------------------------------------------------------------===// 6970// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 6971//===----------------------------------------------------------------------===// 6972let Constraints = "$src1 = $dst" in { 6973multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6974 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6975 // NOTE: The SDNode have the multiply operands first with the add last. 6976 // This enables commuted load patterns to be autogenerated by tablegen. 6977 let ExeDomain = _.ExeDomain in { 6978 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6979 (ins _.RC:$src2, _.RC:$src3), 6980 OpcodeStr, "$src3, $src2", "$src2, $src3", 6981 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6982 AVX512FMA3Base, Sched<[sched]>; 6983 6984 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6985 (ins _.RC:$src2, _.MemOp:$src3), 6986 OpcodeStr, "$src3, $src2", "$src2, $src3", 6987 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 6988 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6989 6990 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6991 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6992 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6993 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6994 (OpNode _.RC:$src2, 6995 (_.VT (_.BroadcastLdFrag addr:$src3)), 6996 _.RC:$src1)>, 6997 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6998 } 6999} 7000} // Constraints = "$src1 = $dst" 7001 7002multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7003 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7004 let Predicates = [HasIFMA] in { 7005 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7006 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7007 } 7008 let Predicates = [HasVLX, HasIFMA] in { 7009 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7010 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7011 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7012 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7013 } 7014} 7015 7016defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7017 SchedWriteVecIMul, avx512vl_i64_info>, 7018 VEX_W; 7019defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7020 SchedWriteVecIMul, avx512vl_i64_info>, 7021 VEX_W; 7022 7023//===----------------------------------------------------------------------===// 7024// AVX-512 Scalar convert from sign integer to float/double 7025//===----------------------------------------------------------------------===// 7026 7027multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7028 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7029 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7030 string mem, list<Register> _Uses = [MXCSR], 7031 bit _mayRaiseFPException = 1> { 7032let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7033 mayRaiseFPException = _mayRaiseFPException in { 7034 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7035 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7036 (ins DstVT.FRC:$src1, SrcRC:$src), 7037 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7038 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7039 let mayLoad = 1 in 7040 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7041 (ins DstVT.FRC:$src1, x86memop:$src), 7042 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7043 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7044 } // hasSideEffects = 0 7045 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7046 (ins DstVT.RC:$src1, SrcRC:$src2), 7047 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7048 [(set DstVT.RC:$dst, 7049 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7050 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7051 7052 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7053 (ins DstVT.RC:$src1, x86memop:$src2), 7054 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7055 [(set DstVT.RC:$dst, 7056 (OpNode (DstVT.VT DstVT.RC:$src1), 7057 (ld_frag addr:$src2)))]>, 7058 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7059} 7060 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7061 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7062 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7063} 7064 7065multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7066 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7067 X86VectorVTInfo DstVT, string asm, 7068 string mem> { 7069 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7070 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7071 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7072 !strconcat(asm, 7073 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7074 [(set DstVT.RC:$dst, 7075 (OpNode (DstVT.VT DstVT.RC:$src1), 7076 SrcRC:$src2, 7077 (i32 timm:$rc)))]>, 7078 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7079 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7080 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7081 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7082} 7083 7084multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7085 X86FoldableSchedWrite sched, 7086 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7087 X86MemOperand x86memop, PatFrag ld_frag, 7088 string asm, string mem> { 7089 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7090 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7091 ld_frag, asm, mem>, VEX_LIG; 7092} 7093 7094let Predicates = [HasAVX512] in { 7095defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7096 WriteCvtI2SS, GR32, 7097 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7098 XS, EVEX_CD8<32, CD8VT1>; 7099defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7100 WriteCvtI2SS, GR64, 7101 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7102 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7103defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7104 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7105 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7106defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7107 WriteCvtI2SD, GR64, 7108 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7109 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7110 7111def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7112 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7113def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7114 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7115 7116def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7117 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7118def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7119 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7120def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7121 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7122def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7123 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7124 7125def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7126 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7127def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7128 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7129def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7130 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7131def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7132 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7133 7134defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7135 WriteCvtI2SS, GR32, 7136 v4f32x_info, i32mem, loadi32, 7137 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7138defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7139 WriteCvtI2SS, GR64, 7140 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7141 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7142defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7143 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7144 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7145defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7146 WriteCvtI2SD, GR64, 7147 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7148 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7149 7150def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7151 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7152def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7153 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7154 7155def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7156 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7157def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7158 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7159def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7160 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7161def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7162 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7163 7164def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7165 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7166def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7167 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7168def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7169 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7170def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7171 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7172} 7173 7174//===----------------------------------------------------------------------===// 7175// AVX-512 Scalar convert from float/double to integer 7176//===----------------------------------------------------------------------===// 7177 7178multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7179 X86VectorVTInfo DstVT, SDNode OpNode, 7180 SDNode OpNodeRnd, 7181 X86FoldableSchedWrite sched, string asm, 7182 string aliasStr> { 7183 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7184 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7185 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7186 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7187 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7188 let Uses = [MXCSR] in 7189 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7190 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7191 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7192 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7193 Sched<[sched]>; 7194 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7195 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7196 [(set DstVT.RC:$dst, (OpNode 7197 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>, 7198 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7199 } // Predicates = [HasAVX512] 7200 7201 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7202 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7203 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7204 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7205 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7206 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7207 SrcVT.IntScalarMemOp:$src), 0, "att">; 7208} 7209 7210// Convert float/double to signed/unsigned int 32/64 7211defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7212 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7213 XS, EVEX_CD8<32, CD8VT1>; 7214defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7215 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7216 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7217defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7218 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7219 XS, EVEX_CD8<32, CD8VT1>; 7220defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7221 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7222 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7223defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7224 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7225 XD, EVEX_CD8<64, CD8VT1>; 7226defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7227 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7228 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7229defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7230 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7231 XD, EVEX_CD8<64, CD8VT1>; 7232defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7233 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7234 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7235 7236// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7237// which produce unnecessary vmovs{s,d} instructions 7238let Predicates = [HasAVX512] in { 7239def : Pat<(v4f32 (X86Movss 7240 (v4f32 VR128X:$dst), 7241 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7242 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7243 7244def : Pat<(v4f32 (X86Movss 7245 (v4f32 VR128X:$dst), 7246 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7247 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7248 7249def : Pat<(v4f32 (X86Movss 7250 (v4f32 VR128X:$dst), 7251 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7252 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7253 7254def : Pat<(v4f32 (X86Movss 7255 (v4f32 VR128X:$dst), 7256 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7257 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7258 7259def : Pat<(v2f64 (X86Movsd 7260 (v2f64 VR128X:$dst), 7261 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7262 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7263 7264def : Pat<(v2f64 (X86Movsd 7265 (v2f64 VR128X:$dst), 7266 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7267 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7268 7269def : Pat<(v2f64 (X86Movsd 7270 (v2f64 VR128X:$dst), 7271 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7272 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7273 7274def : Pat<(v2f64 (X86Movsd 7275 (v2f64 VR128X:$dst), 7276 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7277 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7278 7279def : Pat<(v4f32 (X86Movss 7280 (v4f32 VR128X:$dst), 7281 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7282 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7283 7284def : Pat<(v4f32 (X86Movss 7285 (v4f32 VR128X:$dst), 7286 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7287 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7288 7289def : Pat<(v4f32 (X86Movss 7290 (v4f32 VR128X:$dst), 7291 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7292 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7293 7294def : Pat<(v4f32 (X86Movss 7295 (v4f32 VR128X:$dst), 7296 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7297 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7298 7299def : Pat<(v2f64 (X86Movsd 7300 (v2f64 VR128X:$dst), 7301 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7302 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7303 7304def : Pat<(v2f64 (X86Movsd 7305 (v2f64 VR128X:$dst), 7306 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7307 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7308 7309def : Pat<(v2f64 (X86Movsd 7310 (v2f64 VR128X:$dst), 7311 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7312 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7313 7314def : Pat<(v2f64 (X86Movsd 7315 (v2f64 VR128X:$dst), 7316 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7317 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7318} // Predicates = [HasAVX512] 7319 7320// Convert float/double to signed/unsigned int 32/64 with truncation 7321multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7322 X86VectorVTInfo _DstRC, SDNode OpNode, 7323 SDNode OpNodeInt, SDNode OpNodeSAE, 7324 X86FoldableSchedWrite sched, string aliasStr>{ 7325let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in { 7326 let isCodeGenOnly = 1 in { 7327 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7328 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7329 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7330 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7331 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7332 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7333 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7334 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7335 } 7336 7337 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7338 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7339 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7340 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7341 let Uses = [MXCSR] in 7342 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7343 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7344 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7345 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7346 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7347 (ins _SrcRC.IntScalarMemOp:$src), 7348 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7349 [(set _DstRC.RC:$dst, 7350 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>, 7351 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7352} //HasAVX512 7353 7354 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7355 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7356 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7357 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7358 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7359 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7360 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7361} 7362 7363defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7364 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7365 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7366defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7367 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7368 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7369defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7370 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7371 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7372defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7373 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7374 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7375 7376defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7377 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7378 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7379defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7380 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7381 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7382defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7383 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7384 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7385defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7386 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7387 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7388 7389//===----------------------------------------------------------------------===// 7390// AVX-512 Convert form float to double and back 7391//===----------------------------------------------------------------------===// 7392 7393let Uses = [MXCSR], mayRaiseFPException = 1 in 7394multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7395 X86VectorVTInfo _Src, SDNode OpNode, 7396 X86FoldableSchedWrite sched> { 7397 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7398 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7399 "$src2, $src1", "$src1, $src2", 7400 (_.VT (OpNode (_.VT _.RC:$src1), 7401 (_Src.VT _Src.RC:$src2)))>, 7402 EVEX_4V, VEX_LIG, Sched<[sched]>; 7403 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7404 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7405 "$src2, $src1", "$src1, $src2", 7406 (_.VT (OpNode (_.VT _.RC:$src1), 7407 (_Src.VT _Src.ScalarIntMemCPat:$src2)))>, 7408 EVEX_4V, VEX_LIG, 7409 Sched<[sched.Folded, sched.ReadAfterFold]>; 7410 7411 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7412 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7413 (ins _.FRC:$src1, _Src.FRC:$src2), 7414 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7415 EVEX_4V, VEX_LIG, Sched<[sched]>; 7416 let mayLoad = 1 in 7417 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7418 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7419 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7420 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7421 } 7422} 7423 7424// Scalar Coversion with SAE - suppress all exceptions 7425multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7426 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7427 X86FoldableSchedWrite sched> { 7428 let Uses = [MXCSR] in 7429 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7430 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7431 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7432 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7433 (_Src.VT _Src.RC:$src2)))>, 7434 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7435} 7436 7437// Scalar Conversion with rounding control (RC) 7438multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7439 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7440 X86FoldableSchedWrite sched> { 7441 let Uses = [MXCSR] in 7442 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7443 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7444 "$rc, $src2, $src1", "$src1, $src2, $rc", 7445 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7446 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7447 EVEX_4V, VEX_LIG, Sched<[sched]>, 7448 EVEX_B, EVEX_RC; 7449} 7450multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, 7451 SDNode OpNode, SDNode OpNodeRnd, 7452 X86FoldableSchedWrite sched, 7453 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7454 let Predicates = [HasAVX512] in { 7455 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7456 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7457 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; 7458 } 7459} 7460 7461multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, 7462 SDNode OpNode, SDNode OpNodeSAE, 7463 X86FoldableSchedWrite sched, 7464 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7465 let Predicates = [HasAVX512] in { 7466 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7467 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7468 EVEX_CD8<32, CD8VT1>, XS; 7469 } 7470} 7471defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds, 7472 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7473 f32x_info>; 7474defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts, 7475 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7476 f64x_info>; 7477 7478def : Pat<(f64 (any_fpextend FR32X:$src)), 7479 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7480 Requires<[HasAVX512]>; 7481def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7482 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7483 Requires<[HasAVX512, OptForSize]>; 7484 7485def : Pat<(f32 (any_fpround FR64X:$src)), 7486 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7487 Requires<[HasAVX512]>; 7488 7489def : Pat<(v4f32 (X86Movss 7490 (v4f32 VR128X:$dst), 7491 (v4f32 (scalar_to_vector 7492 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7493 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7494 Requires<[HasAVX512]>; 7495 7496def : Pat<(v2f64 (X86Movsd 7497 (v2f64 VR128X:$dst), 7498 (v2f64 (scalar_to_vector 7499 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7500 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7501 Requires<[HasAVX512]>; 7502 7503//===----------------------------------------------------------------------===// 7504// AVX-512 Vector convert from signed/unsigned integer to float/double 7505// and from float/double to signed/unsigned integer 7506//===----------------------------------------------------------------------===// 7507 7508multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7509 X86VectorVTInfo _Src, SDNode OpNode, 7510 X86FoldableSchedWrite sched, 7511 string Broadcast = _.BroadcastStr, 7512 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7513 RegisterClass MaskRC = _.KRCWM, 7514 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7515let Uses = [MXCSR], mayRaiseFPException = 1 in { 7516 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst), 7517 (ins _Src.RC:$src), 7518 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7519 (ins MaskRC:$mask, _Src.RC:$src), 7520 OpcodeStr, "$src", "$src", 7521 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7522 (vselect MaskRC:$mask, 7523 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7524 _.RC:$src0), 7525 vselect, "$src0 = $dst">, 7526 EVEX, Sched<[sched]>; 7527 7528 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), 7529 (ins MemOp:$src), 7530 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7531 (ins MaskRC:$mask, MemOp:$src), 7532 OpcodeStr#Alias, "$src", "$src", 7533 LdDAG, 7534 (vselect MaskRC:$mask, LdDAG, _.RC:$src0), 7535 vselect, "$src0 = $dst">, 7536 EVEX, Sched<[sched.Folded]>; 7537 7538 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), 7539 (ins _Src.ScalarMemOp:$src), 7540 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7541 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7542 OpcodeStr, 7543 "${src}"##Broadcast, "${src}"##Broadcast, 7544 (_.VT (OpNode (_Src.VT 7545 (_Src.BroadcastLdFrag addr:$src)) 7546 )), 7547 (vselect MaskRC:$mask, 7548 (_.VT 7549 (OpNode 7550 (_Src.VT 7551 (_Src.BroadcastLdFrag addr:$src)))), 7552 _.RC:$src0), 7553 vselect, "$src0 = $dst">, 7554 EVEX, EVEX_B, Sched<[sched.Folded]>; 7555 } 7556} 7557// Coversion with SAE - suppress all exceptions 7558multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7559 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7560 X86FoldableSchedWrite sched> { 7561 let Uses = [MXCSR] in 7562 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7563 (ins _Src.RC:$src), OpcodeStr, 7564 "{sae}, $src", "$src, {sae}", 7565 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7566 EVEX, EVEX_B, Sched<[sched]>; 7567} 7568 7569// Conversion with rounding control (RC) 7570multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7571 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7572 X86FoldableSchedWrite sched> { 7573 let Uses = [MXCSR] in 7574 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7575 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7576 "$rc, $src", "$src, $rc", 7577 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7578 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7579} 7580 7581// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7582multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7583 X86VectorVTInfo _Src, SDNode OpNode, 7584 X86FoldableSchedWrite sched, 7585 string Broadcast = _.BroadcastStr, 7586 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7587 RegisterClass MaskRC = _.KRCWM> 7588 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias, 7589 MemOp, MaskRC, 7590 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7591 7592// Extend Float to Double 7593multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, 7594 X86SchedWriteWidths sched> { 7595 let Predicates = [HasAVX512] in { 7596 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, 7597 any_fpextend, sched.ZMM>, 7598 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, 7599 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7600 } 7601 let Predicates = [HasVLX] in { 7602 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, 7603 X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7604 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend, 7605 sched.YMM>, EVEX_V256; 7606 } 7607} 7608 7609// Truncate Double to Float 7610multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7611 let Predicates = [HasAVX512] in { 7612 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86any_vfpround, sched.ZMM>, 7613 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, 7614 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7615 } 7616 let Predicates = [HasVLX] in { 7617 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, 7618 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>, 7619 EVEX_V128; 7620 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86any_vfpround, 7621 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7622 } 7623 7624 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7625 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7626 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7627 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7628 VK2WM:$mask, VR128X:$src), 0, "att">; 7629 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|" 7630 "$dst {${mask}} {z}, $src}", 7631 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7632 VK2WM:$mask, VR128X:$src), 0, "att">; 7633 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7634 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7635 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7636 "$dst {${mask}}, ${src}{1to2}}", 7637 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7638 VK2WM:$mask, f64mem:$src), 0, "att">; 7639 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7640 "$dst {${mask}} {z}, ${src}{1to2}}", 7641 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7642 VK2WM:$mask, f64mem:$src), 0, "att">; 7643 7644 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7645 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7646 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7647 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7648 VK4WM:$mask, VR256X:$src), 0, "att">; 7649 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|" 7650 "$dst {${mask}} {z}, $src}", 7651 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7652 VK4WM:$mask, VR256X:$src), 0, "att">; 7653 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7654 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7655 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7656 "$dst {${mask}}, ${src}{1to4}}", 7657 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7658 VK4WM:$mask, f64mem:$src), 0, "att">; 7659 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7660 "$dst {${mask}} {z}, ${src}{1to4}}", 7661 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7662 VK4WM:$mask, f64mem:$src), 0, "att">; 7663} 7664 7665defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, 7666 VEX_W, PD, EVEX_CD8<64, CD8VF>; 7667defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, 7668 PS, EVEX_CD8<32, CD8VH>; 7669 7670let Predicates = [HasVLX] in { 7671 // Special patterns to allow use of X86vmfpround for masking. Instruction 7672 // patterns have been disabled with null_frag. 7673 def : Pat<(X86any_vfpround (v2f64 VR128X:$src)), 7674 (VCVTPD2PSZ128rr VR128X:$src)>; 7675 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), 7676 VK2WM:$mask), 7677 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 7678 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV, 7679 VK2WM:$mask), 7680 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 7681 7682 def : Pat<(X86any_vfpround (loadv2f64 addr:$src)), 7683 (VCVTPD2PSZ128rm addr:$src)>; 7684 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), 7685 VK2WM:$mask), 7686 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7687 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV, 7688 VK2WM:$mask), 7689 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; 7690 7691 def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))), 7692 (VCVTPD2PSZ128rmb addr:$src)>; 7693 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7694 (v4f32 VR128X:$src0), VK2WM:$mask), 7695 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7696 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7697 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 7698 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 7699} 7700 7701// Convert Signed/Unsigned Doubleword to Double 7702let Uses = []<Register>, mayRaiseFPException = 0 in 7703multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7704 SDNode OpNode128, X86SchedWriteWidths sched> { 7705 // No rounding in this op 7706 let Predicates = [HasAVX512] in 7707 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 7708 sched.ZMM>, EVEX_V512; 7709 7710 let Predicates = [HasVLX] in { 7711 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 7712 OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM, 7713 (v2f64 (OpNode128 (bc_v4i32 7714 (v2i64 7715 (scalar_to_vector (loadi64 addr:$src))))))>, 7716 EVEX_V128; 7717 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 7718 sched.YMM>, EVEX_V256; 7719 } 7720} 7721 7722// Convert Signed/Unsigned Doubleword to Float 7723multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7724 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7725 let Predicates = [HasAVX512] in 7726 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 7727 sched.ZMM>, 7728 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 7729 OpNodeRnd, sched.ZMM>, EVEX_V512; 7730 7731 let Predicates = [HasVLX] in { 7732 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 7733 sched.XMM>, EVEX_V128; 7734 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 7735 sched.YMM>, EVEX_V256; 7736 } 7737} 7738 7739// Convert Float to Signed/Unsigned Doubleword with truncation 7740multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7741 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7742 let Predicates = [HasAVX512] in { 7743 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7744 sched.ZMM>, 7745 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 7746 OpNodeSAE, sched.ZMM>, EVEX_V512; 7747 } 7748 let Predicates = [HasVLX] in { 7749 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7750 sched.XMM>, EVEX_V128; 7751 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7752 sched.YMM>, EVEX_V256; 7753 } 7754} 7755 7756// Convert Float to Signed/Unsigned Doubleword 7757multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7758 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7759 let Predicates = [HasAVX512] in { 7760 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7761 sched.ZMM>, 7762 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 7763 OpNodeRnd, sched.ZMM>, EVEX_V512; 7764 } 7765 let Predicates = [HasVLX] in { 7766 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7767 sched.XMM>, EVEX_V128; 7768 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7769 sched.YMM>, EVEX_V256; 7770 } 7771} 7772 7773// Convert Double to Signed/Unsigned Doubleword with truncation 7774multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7775 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7776 let Predicates = [HasAVX512] in { 7777 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7778 sched.ZMM>, 7779 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 7780 OpNodeSAE, sched.ZMM>, EVEX_V512; 7781 } 7782 let Predicates = [HasVLX] in { 7783 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7784 // memory forms of these instructions in Asm Parser. They have the same 7785 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7786 // due to the same reason. 7787 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7788 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7789 VK2WM>, EVEX_V128; 7790 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7791 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7792 } 7793 7794 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7795 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 7796 VR128X:$src), 0, "att">; 7797 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7798 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7799 VK2WM:$mask, VR128X:$src), 0, "att">; 7800 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7801 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7802 VK2WM:$mask, VR128X:$src), 0, "att">; 7803 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7804 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7805 f64mem:$src), 0, "att">; 7806 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7807 "$dst {${mask}}, ${src}{1to2}}", 7808 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7809 VK2WM:$mask, f64mem:$src), 0, "att">; 7810 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7811 "$dst {${mask}} {z}, ${src}{1to2}}", 7812 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7813 VK2WM:$mask, f64mem:$src), 0, "att">; 7814 7815 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7816 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 7817 VR256X:$src), 0, "att">; 7818 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7819 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7820 VK4WM:$mask, VR256X:$src), 0, "att">; 7821 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7822 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7823 VK4WM:$mask, VR256X:$src), 0, "att">; 7824 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7825 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7826 f64mem:$src), 0, "att">; 7827 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7828 "$dst {${mask}}, ${src}{1to4}}", 7829 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7830 VK4WM:$mask, f64mem:$src), 0, "att">; 7831 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7832 "$dst {${mask}} {z}, ${src}{1to4}}", 7833 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7834 VK4WM:$mask, f64mem:$src), 0, "att">; 7835} 7836 7837// Convert Double to Signed/Unsigned Doubleword 7838multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7839 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7840 let Predicates = [HasAVX512] in { 7841 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7842 sched.ZMM>, 7843 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 7844 OpNodeRnd, sched.ZMM>, EVEX_V512; 7845 } 7846 let Predicates = [HasVLX] in { 7847 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7848 // memory forms of these instructions in Asm Parcer. They have the same 7849 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7850 // due to the same reason. 7851 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7852 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7853 VK2WM>, EVEX_V128; 7854 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7855 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7856 } 7857 7858 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7859 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7860 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7861 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7862 VK2WM:$mask, VR128X:$src), 0, "att">; 7863 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7864 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7865 VK2WM:$mask, VR128X:$src), 0, "att">; 7866 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7867 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7868 f64mem:$src), 0, "att">; 7869 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7870 "$dst {${mask}}, ${src}{1to2}}", 7871 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7872 VK2WM:$mask, f64mem:$src), 0, "att">; 7873 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7874 "$dst {${mask}} {z}, ${src}{1to2}}", 7875 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7876 VK2WM:$mask, f64mem:$src), 0, "att">; 7877 7878 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7879 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7880 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7881 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7882 VK4WM:$mask, VR256X:$src), 0, "att">; 7883 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7884 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7885 VK4WM:$mask, VR256X:$src), 0, "att">; 7886 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7887 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7888 f64mem:$src), 0, "att">; 7889 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7890 "$dst {${mask}}, ${src}{1to4}}", 7891 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7892 VK4WM:$mask, f64mem:$src), 0, "att">; 7893 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7894 "$dst {${mask}} {z}, ${src}{1to4}}", 7895 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7896 VK4WM:$mask, f64mem:$src), 0, "att">; 7897} 7898 7899// Convert Double to Signed/Unsigned Quardword 7900multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7901 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7902 let Predicates = [HasDQI] in { 7903 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7904 sched.ZMM>, 7905 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 7906 OpNodeRnd, sched.ZMM>, EVEX_V512; 7907 } 7908 let Predicates = [HasDQI, HasVLX] in { 7909 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7910 sched.XMM>, EVEX_V128; 7911 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7912 sched.YMM>, EVEX_V256; 7913 } 7914} 7915 7916// Convert Double to Signed/Unsigned Quardword with truncation 7917multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7918 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7919 let Predicates = [HasDQI] in { 7920 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7921 sched.ZMM>, 7922 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 7923 OpNodeRnd, sched.ZMM>, EVEX_V512; 7924 } 7925 let Predicates = [HasDQI, HasVLX] in { 7926 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7927 sched.XMM>, EVEX_V128; 7928 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7929 sched.YMM>, EVEX_V256; 7930 } 7931} 7932 7933// Convert Signed/Unsigned Quardword to Double 7934multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7935 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7936 let Predicates = [HasDQI] in { 7937 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 7938 sched.ZMM>, 7939 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 7940 OpNodeRnd, sched.ZMM>, EVEX_V512; 7941 } 7942 let Predicates = [HasDQI, HasVLX] in { 7943 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 7944 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 7945 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 7946 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 7947 } 7948} 7949 7950// Convert Float to Signed/Unsigned Quardword 7951multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7952 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7953 let Predicates = [HasDQI] in { 7954 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 7955 sched.ZMM>, 7956 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 7957 OpNodeRnd, sched.ZMM>, EVEX_V512; 7958 } 7959 let Predicates = [HasDQI, HasVLX] in { 7960 // Explicitly specified broadcast string, since we take only 2 elements 7961 // from v4f32x_info source 7962 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 7963 sched.XMM, "{1to2}", "", f64mem, VK2WM, 7964 (v2i64 (OpNode (bc_v4f32 7965 (v2f64 7966 (scalar_to_vector (loadf64 addr:$src))))))>, 7967 EVEX_V128; 7968 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 7969 sched.YMM>, EVEX_V256; 7970 } 7971} 7972 7973// Convert Float to Signed/Unsigned Quardword with truncation 7974multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7975 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7976 let Predicates = [HasDQI] in { 7977 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>, 7978 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 7979 OpNodeRnd, sched.ZMM>, EVEX_V512; 7980 } 7981 let Predicates = [HasDQI, HasVLX] in { 7982 // Explicitly specified broadcast string, since we take only 2 elements 7983 // from v4f32x_info source 7984 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 7985 sched.XMM, "{1to2}", "", f64mem, VK2WM, 7986 (v2i64 (OpNode (bc_v4f32 7987 (v2f64 7988 (scalar_to_vector (loadf64 addr:$src))))))>, 7989 EVEX_V128; 7990 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 7991 sched.YMM>, EVEX_V256; 7992 } 7993} 7994 7995// Convert Signed/Unsigned Quardword to Float 7996multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7997 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7998 let Predicates = [HasDQI] in { 7999 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, 8000 sched.ZMM>, 8001 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, 8002 OpNodeRnd, sched.ZMM>, EVEX_V512; 8003 } 8004 let Predicates = [HasDQI, HasVLX] in { 8005 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8006 // memory forms of these instructions in Asm Parcer. They have the same 8007 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8008 // due to the same reason. 8009 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag, 8010 sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>, 8011 EVEX_V128, NotEVEX2VEXConvertible; 8012 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, 8013 sched.YMM, "{1to4}", "{y}">, EVEX_V256, 8014 NotEVEX2VEXConvertible; 8015 } 8016 8017 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 8018 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8019 VR128X:$src), 0, "att">; 8020 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8021 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8022 VK2WM:$mask, VR128X:$src), 0, "att">; 8023 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8024 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8025 VK2WM:$mask, VR128X:$src), 0, "att">; 8026 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8027 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8028 i64mem:$src), 0, "att">; 8029 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 8030 "$dst {${mask}}, ${src}{1to2}}", 8031 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8032 VK2WM:$mask, i64mem:$src), 0, "att">; 8033 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8034 "$dst {${mask}} {z}, ${src}{1to2}}", 8035 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8036 VK2WM:$mask, i64mem:$src), 0, "att">; 8037 8038 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 8039 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8040 VR256X:$src), 0, "att">; 8041 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|" 8042 "$dst {${mask}}, $src}", 8043 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8044 VK4WM:$mask, VR256X:$src), 0, "att">; 8045 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|" 8046 "$dst {${mask}} {z}, $src}", 8047 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8048 VK4WM:$mask, VR256X:$src), 0, "att">; 8049 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8050 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8051 i64mem:$src), 0, "att">; 8052 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 8053 "$dst {${mask}}, ${src}{1to4}}", 8054 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8055 VK4WM:$mask, i64mem:$src), 0, "att">; 8056 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8057 "$dst {${mask}} {z}, ${src}{1to4}}", 8058 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8059 VK4WM:$mask, i64mem:$src), 0, "att">; 8060} 8061 8062defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP, 8063 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8064 8065defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, 8066 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8067 PS, EVEX_CD8<32, CD8VF>; 8068 8069defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8070 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, 8071 XS, EVEX_CD8<32, CD8VF>; 8072 8073defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8074 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, 8075 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8076 8077defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8078 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS, 8079 EVEX_CD8<32, CD8VF>; 8080 8081defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8082 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, 8083 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8084 8085defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8086 X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS, 8087 EVEX_CD8<32, CD8VH>; 8088 8089defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8090 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD, 8091 EVEX_CD8<32, CD8VF>; 8092 8093defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, 8094 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8095 EVEX_CD8<32, CD8VF>; 8096 8097defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, 8098 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8099 VEX_W, EVEX_CD8<64, CD8VF>; 8100 8101defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, 8102 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8103 PS, EVEX_CD8<32, CD8VF>; 8104 8105defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, 8106 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8107 PS, EVEX_CD8<64, CD8VF>; 8108 8109defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, 8110 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8111 PD, EVEX_CD8<64, CD8VF>; 8112 8113defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, 8114 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8115 EVEX_CD8<32, CD8VH>; 8116 8117defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, 8118 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8119 PD, EVEX_CD8<64, CD8VF>; 8120 8121defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, 8122 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8123 EVEX_CD8<32, CD8VH>; 8124 8125defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8126 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W, 8127 PD, EVEX_CD8<64, CD8VF>; 8128 8129defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8130 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD, 8131 EVEX_CD8<32, CD8VH>; 8132 8133defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8134 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W, 8135 PD, EVEX_CD8<64, CD8VF>; 8136 8137defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8138 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD, 8139 EVEX_CD8<32, CD8VH>; 8140 8141defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8142 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8143 EVEX_CD8<64, CD8VF>; 8144 8145defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8146 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8147 EVEX_CD8<64, CD8VF>; 8148 8149defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp, 8150 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS, 8151 EVEX_CD8<64, CD8VF>; 8152 8153defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp, 8154 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, 8155 EVEX_CD8<64, CD8VF>; 8156 8157let Predicates = [HasVLX] in { 8158 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8159 // patterns have been disabled with null_frag. 8160 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8161 (VCVTPD2DQZ128rr VR128X:$src)>; 8162 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8163 VK2WM:$mask), 8164 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8165 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8166 VK2WM:$mask), 8167 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8168 8169 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8170 (VCVTPD2DQZ128rm addr:$src)>; 8171 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8172 VK2WM:$mask), 8173 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8174 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8175 VK2WM:$mask), 8176 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8177 8178 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8179 (VCVTPD2DQZ128rmb addr:$src)>; 8180 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8181 (v4i32 VR128X:$src0), VK2WM:$mask), 8182 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8183 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8184 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8185 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8186 8187 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8188 // patterns have been disabled with null_frag. 8189 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8190 (VCVTTPD2DQZ128rr VR128X:$src)>; 8191 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8192 VK2WM:$mask), 8193 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8194 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8195 VK2WM:$mask), 8196 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8197 8198 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8199 (VCVTTPD2DQZ128rm addr:$src)>; 8200 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8201 VK2WM:$mask), 8202 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8203 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8204 VK2WM:$mask), 8205 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8206 8207 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8208 (VCVTTPD2DQZ128rmb addr:$src)>; 8209 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8210 (v4i32 VR128X:$src0), VK2WM:$mask), 8211 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8212 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8213 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8214 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8215 8216 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8217 // patterns have been disabled with null_frag. 8218 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8219 (VCVTPD2UDQZ128rr VR128X:$src)>; 8220 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8221 VK2WM:$mask), 8222 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8223 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8224 VK2WM:$mask), 8225 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8226 8227 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8228 (VCVTPD2UDQZ128rm addr:$src)>; 8229 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8230 VK2WM:$mask), 8231 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8232 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8233 VK2WM:$mask), 8234 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8235 8236 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8237 (VCVTPD2UDQZ128rmb addr:$src)>; 8238 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8239 (v4i32 VR128X:$src0), VK2WM:$mask), 8240 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8241 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8242 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8243 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8244 8245 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8246 // patterns have been disabled with null_frag. 8247 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8248 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8249 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8250 VK2WM:$mask), 8251 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8252 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8253 VK2WM:$mask), 8254 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8255 8256 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 8257 (VCVTTPD2UDQZ128rm addr:$src)>; 8258 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8259 VK2WM:$mask), 8260 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8261 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8262 VK2WM:$mask), 8263 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8264 8265 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8266 (VCVTTPD2UDQZ128rmb addr:$src)>; 8267 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8268 (v4i32 VR128X:$src0), VK2WM:$mask), 8269 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8270 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8271 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8272 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8273} 8274 8275let Predicates = [HasDQI, HasVLX] in { 8276 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8277 (VCVTPS2QQZ128rm addr:$src)>; 8278 def : Pat<(v2i64 (vselect VK2WM:$mask, 8279 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8280 VR128X:$src0)), 8281 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8282 def : Pat<(v2i64 (vselect VK2WM:$mask, 8283 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8284 v2i64x_info.ImmAllZerosV)), 8285 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8286 8287 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8288 (VCVTPS2UQQZ128rm addr:$src)>; 8289 def : Pat<(v2i64 (vselect VK2WM:$mask, 8290 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8291 VR128X:$src0)), 8292 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8293 def : Pat<(v2i64 (vselect VK2WM:$mask, 8294 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8295 v2i64x_info.ImmAllZerosV)), 8296 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8297 8298 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8299 (VCVTTPS2QQZ128rm addr:$src)>; 8300 def : Pat<(v2i64 (vselect VK2WM:$mask, 8301 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8302 VR128X:$src0)), 8303 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8304 def : Pat<(v2i64 (vselect VK2WM:$mask, 8305 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8306 v2i64x_info.ImmAllZerosV)), 8307 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8308 8309 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8310 (VCVTTPS2UQQZ128rm addr:$src)>; 8311 def : Pat<(v2i64 (vselect VK2WM:$mask, 8312 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8313 VR128X:$src0)), 8314 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8315 def : Pat<(v2i64 (vselect VK2WM:$mask, 8316 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8317 v2i64x_info.ImmAllZerosV)), 8318 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8319} 8320 8321let Predicates = [HasVLX] in { 8322 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8323 (VCVTDQ2PDZ128rm addr:$src)>; 8324 def : Pat<(v2f64 (vselect VK2WM:$mask, 8325 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8326 VR128X:$src0)), 8327 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8328 def : Pat<(v2f64 (vselect VK2WM:$mask, 8329 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8330 v2f64x_info.ImmAllZerosV)), 8331 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8332 8333 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8334 (VCVTUDQ2PDZ128rm addr:$src)>; 8335 def : Pat<(v2f64 (vselect VK2WM:$mask, 8336 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8337 VR128X:$src0)), 8338 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8339 def : Pat<(v2f64 (vselect VK2WM:$mask, 8340 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8341 v2f64x_info.ImmAllZerosV)), 8342 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8343} 8344 8345let Predicates = [HasDQI, HasVLX] in { 8346 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 8347 // patterns have been disabled with null_frag. 8348 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))), 8349 (VCVTQQ2PSZ128rr VR128X:$src)>; 8350 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8351 VK2WM:$mask), 8352 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8353 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8354 VK2WM:$mask), 8355 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8356 8357 def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))), 8358 (VCVTQQ2PSZ128rm addr:$src)>; 8359 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8360 VK2WM:$mask), 8361 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8362 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8363 VK2WM:$mask), 8364 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8365 8366 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8367 (VCVTQQ2PSZ128rmb addr:$src)>; 8368 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8369 (v4f32 VR128X:$src0), VK2WM:$mask), 8370 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8371 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8372 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8373 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8374 8375 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 8376 // patterns have been disabled with null_frag. 8377 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))), 8378 (VCVTUQQ2PSZ128rr VR128X:$src)>; 8379 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8380 VK2WM:$mask), 8381 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8382 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8383 VK2WM:$mask), 8384 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8385 8386 def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))), 8387 (VCVTUQQ2PSZ128rm addr:$src)>; 8388 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8389 VK2WM:$mask), 8390 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8391 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8392 VK2WM:$mask), 8393 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8394 8395 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8396 (VCVTUQQ2PSZ128rmb addr:$src)>; 8397 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8398 (v4f32 VR128X:$src0), VK2WM:$mask), 8399 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8400 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8401 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8402 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8403} 8404 8405//===----------------------------------------------------------------------===// 8406// Half precision conversion instructions 8407//===----------------------------------------------------------------------===// 8408 8409let Uses = [MXCSR], mayRaiseFPException = 1 in 8410multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8411 X86MemOperand x86memop, PatFrag ld_frag, 8412 X86FoldableSchedWrite sched> { 8413 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8414 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8415 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8416 T8PD, Sched<[sched]>; 8417 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8418 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8419 (X86cvtph2ps (_src.VT 8420 (ld_frag addr:$src)))>, 8421 T8PD, Sched<[sched.Folded]>; 8422} 8423 8424multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8425 X86FoldableSchedWrite sched> { 8426 let Uses = [MXCSR] in 8427 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8428 (ins _src.RC:$src), "vcvtph2ps", 8429 "{sae}, $src", "$src, {sae}", 8430 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8431 T8PD, EVEX_B, Sched<[sched]>; 8432} 8433 8434let Predicates = [HasAVX512] in 8435 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load, 8436 WriteCvtPH2PSZ>, 8437 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8438 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8439 8440let Predicates = [HasVLX] in { 8441 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8442 load, WriteCvtPH2PSY>, EVEX, EVEX_V256, 8443 EVEX_CD8<32, CD8VH>; 8444 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8445 load, WriteCvtPH2PS>, EVEX, EVEX_V128, 8446 EVEX_CD8<32, CD8VH>; 8447 8448 // Pattern match vcvtph2ps of a scalar i64 load. 8449 def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 8450 (VCVTPH2PSZ128rm addr:$src)>; 8451 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert 8452 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8453 (VCVTPH2PSZ128rm addr:$src)>; 8454} 8455 8456multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8457 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8458let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8459 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8460 (ins _src.RC:$src1, i32u8imm:$src2), 8461 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8462 [(set _dest.RC:$dst, 8463 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8464 Sched<[RR]>; 8465 let Constraints = "$src0 = $dst" in 8466 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8467 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8468 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8469 [(set _dest.RC:$dst, 8470 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8471 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8472 Sched<[RR]>, EVEX_K; 8473 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8474 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8475 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8476 [(set _dest.RC:$dst, 8477 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8478 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8479 Sched<[RR]>, EVEX_KZ; 8480 let hasSideEffects = 0, mayStore = 1 in { 8481 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8482 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8483 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8484 Sched<[MR]>; 8485 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8486 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8487 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8488 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 8489 } 8490} 8491} 8492 8493multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8494 SchedWrite Sched> { 8495 let hasSideEffects = 0, Uses = [MXCSR] in 8496 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 8497 (outs _dest.RC:$dst), 8498 (ins _src.RC:$src1, i32u8imm:$src2), 8499 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 8500 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 8501} 8502 8503let Predicates = [HasAVX512] in { 8504 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8505 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8506 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8507 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8508 let Predicates = [HasVLX] in { 8509 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 8510 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 8511 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 8512 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 8513 WriteCvtPS2PH, WriteCvtPS2PHSt>, 8514 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 8515 } 8516 8517 def : Pat<(store (f64 (extractelt 8518 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), 8519 (iPTR 0))), addr:$dst), 8520 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8521 def : Pat<(store (i64 (extractelt 8522 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), 8523 (iPTR 0))), addr:$dst), 8524 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8525 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 8526 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 8527 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 8528 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 8529} 8530 8531// Patterns for matching conversions from float to half-float and vice versa. 8532let Predicates = [HasVLX] in { 8533 // Use MXCSR.RC for rounding instead of explicitly specifying the default 8534 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the 8535 // configurations we support (the default). However, falling back to MXCSR is 8536 // more consistent with other instructions, which are always controlled by it. 8537 // It's encoded as 0b100. 8538 def : Pat<(fp_to_f16 FR32X:$src), 8539 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr 8540 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>; 8541 8542 def : Pat<(f16_to_fp GR16:$src), 8543 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8544 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >; 8545 8546 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))), 8547 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8548 (v8i16 (VCVTPS2PHZ128rr 8549 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >; 8550} 8551 8552// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 8553multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 8554 string OpcodeStr, Domain d, 8555 X86FoldableSchedWrite sched = WriteFCom> { 8556 let hasSideEffects = 0, Uses = [MXCSR] in 8557 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 8558 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 8559 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 8560} 8561 8562let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8563 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 8564 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8565 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 8566 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8567 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 8568 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8569 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 8570 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8571} 8572 8573let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8574 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 8575 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8576 EVEX_CD8<32, CD8VT1>; 8577 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 8578 "ucomisd", SSEPackedDouble>, PD, EVEX, 8579 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8580 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 8581 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8582 EVEX_CD8<32, CD8VT1>; 8583 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 8584 "comisd", SSEPackedDouble>, PD, EVEX, 8585 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8586 let isCodeGenOnly = 1 in { 8587 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 8588 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8589 EVEX_CD8<32, CD8VT1>; 8590 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 8591 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 8592 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8593 8594 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 8595 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8596 EVEX_CD8<32, CD8VT1>; 8597 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 8598 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 8599 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8600 } 8601} 8602 8603/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd 8604multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8605 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8606 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 8607 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8609 "$src2, $src1", "$src1, $src2", 8610 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8611 EVEX_4V, VEX_LIG, Sched<[sched]>; 8612 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8613 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8614 "$src2, $src1", "$src1, $src2", 8615 (OpNode (_.VT _.RC:$src1), 8616 _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG, 8617 Sched<[sched.Folded, sched.ReadAfterFold]>; 8618} 8619} 8620 8621defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 8622 f32x_info>, EVEX_CD8<32, CD8VT1>, 8623 T8PD; 8624defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 8625 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 8626 T8PD; 8627defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 8628 SchedWriteFRsqrt.Scl, f32x_info>, 8629 EVEX_CD8<32, CD8VT1>, T8PD; 8630defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 8631 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 8632 EVEX_CD8<64, CD8VT1>, T8PD; 8633 8634/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 8635multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 8636 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8637 let ExeDomain = _.ExeDomain in { 8638 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8639 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8640 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 8641 Sched<[sched]>; 8642 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8643 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8644 (OpNode (_.VT 8645 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 8646 Sched<[sched.Folded, sched.ReadAfterFold]>; 8647 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8648 (ins _.ScalarMemOp:$src), OpcodeStr, 8649 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8650 (OpNode (_.VT 8651 (_.BroadcastLdFrag addr:$src)))>, 8652 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8653 } 8654} 8655 8656let Uses = [MXCSR] in 8657multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 8658 X86SchedWriteWidths sched> { 8659 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM, 8660 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 8661 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM, 8662 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8663 8664 // Define only if AVX512VL feature is present. 8665 let Predicates = [HasVLX] in { 8666 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8667 OpNode, sched.XMM, v4f32x_info>, 8668 EVEX_V128, EVEX_CD8<32, CD8VF>; 8669 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8670 OpNode, sched.YMM, v8f32x_info>, 8671 EVEX_V256, EVEX_CD8<32, CD8VF>; 8672 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8673 OpNode, sched.XMM, v2f64x_info>, 8674 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 8675 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8676 OpNode, sched.YMM, v4f64x_info>, 8677 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 8678 } 8679} 8680 8681defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; 8682defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; 8683 8684/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 8685multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 8686 SDNode OpNode, SDNode OpNodeSAE, 8687 X86FoldableSchedWrite sched> { 8688 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 8689 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8690 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8691 "$src2, $src1", "$src1, $src2", 8692 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8693 Sched<[sched]>, SIMD_EXC; 8694 8695 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8696 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8697 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 8698 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8699 EVEX_B, Sched<[sched]>; 8700 8701 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8702 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8703 "$src2, $src1", "$src1, $src2", 8704 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>, 8705 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8706 } 8707} 8708 8709multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8710 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 8711 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 8712 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG; 8713 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 8714 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 8715} 8716 8717let Predicates = [HasERI] in { 8718 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 8719 SchedWriteFRcp.Scl>, T8PD, EVEX_4V; 8720 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 8721 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; 8722} 8723 8724defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 8725 SchedWriteFRnd.Scl>, T8PD, EVEX_4V; 8726/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 8727 8728multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8729 SDNode OpNode, X86FoldableSchedWrite sched> { 8730 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8731 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8732 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8733 (OpNode (_.VT _.RC:$src))>, 8734 Sched<[sched]>; 8735 8736 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8737 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8738 (OpNode (_.VT 8739 (bitconvert (_.LdFrag addr:$src))))>, 8740 Sched<[sched.Folded, sched.ReadAfterFold]>; 8741 8742 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8743 (ins _.ScalarMemOp:$src), OpcodeStr, 8744 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8745 (OpNode (_.VT 8746 (_.BroadcastLdFrag addr:$src)))>, 8747 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8748 } 8749} 8750multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8751 SDNode OpNode, X86FoldableSchedWrite sched> { 8752 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 8753 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8754 (ins _.RC:$src), OpcodeStr, 8755 "{sae}, $src", "$src, {sae}", 8756 (OpNode (_.VT _.RC:$src))>, 8757 EVEX_B, Sched<[sched]>; 8758} 8759 8760multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 8761 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8762 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8763 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 8764 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 8765 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8766 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 8767 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8768} 8769 8770multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 8771 SDNode OpNode, X86SchedWriteWidths sched> { 8772 // Define only if AVX512VL feature is present. 8773 let Predicates = [HasVLX] in { 8774 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 8775 sched.XMM>, 8776 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 8777 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 8778 sched.YMM>, 8779 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 8780 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 8781 sched.XMM>, 8782 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8783 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 8784 sched.YMM>, 8785 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8786 } 8787} 8788 8789let Predicates = [HasERI] in { 8790 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 8791 SchedWriteFRsqrt>, EVEX; 8792 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 8793 SchedWriteFRcp>, EVEX; 8794 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 8795 SchedWriteFAdd>, EVEX; 8796} 8797defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 8798 SchedWriteFRnd>, 8799 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 8800 SchedWriteFRnd>, EVEX; 8801 8802multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 8803 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8804 let ExeDomain = _.ExeDomain in 8805 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8806 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 8807 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 8808 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8809} 8810 8811multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 8812 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8813 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8814 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8815 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8816 (_.VT (any_fsqrt _.RC:$src))>, EVEX, 8817 Sched<[sched]>; 8818 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8819 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8820 (any_fsqrt (_.VT 8821 (bitconvert (_.LdFrag addr:$src))))>, EVEX, 8822 Sched<[sched.Folded, sched.ReadAfterFold]>; 8823 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8824 (ins _.ScalarMemOp:$src), OpcodeStr, 8825 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8826 (any_fsqrt (_.VT 8827 (_.BroadcastLdFrag addr:$src)))>, 8828 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8829 } 8830} 8831 8832let Uses = [MXCSR], mayRaiseFPException = 1 in 8833multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 8834 X86SchedWriteSizes sched> { 8835 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8836 sched.PS.ZMM, v16f32_info>, 8837 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8838 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8839 sched.PD.ZMM, v8f64_info>, 8840 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8841 // Define only if AVX512VL feature is present. 8842 let Predicates = [HasVLX] in { 8843 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8844 sched.PS.XMM, v4f32x_info>, 8845 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 8846 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8847 sched.PS.YMM, v8f32x_info>, 8848 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 8849 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8850 sched.PD.XMM, v2f64x_info>, 8851 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8852 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8853 sched.PD.YMM, v4f64x_info>, 8854 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8855 } 8856} 8857 8858let Uses = [MXCSR] in 8859multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 8860 X86SchedWriteSizes sched> { 8861 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 8862 sched.PS.ZMM, v16f32_info>, 8863 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8864 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 8865 sched.PD.ZMM, v8f64_info>, 8866 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8867} 8868 8869multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 8870 X86VectorVTInfo _, string Name> { 8871 let ExeDomain = _.ExeDomain in { 8872 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8873 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8874 "$src2, $src1", "$src1, $src2", 8875 (X86fsqrts (_.VT _.RC:$src1), 8876 (_.VT _.RC:$src2))>, 8877 Sched<[sched]>, SIMD_EXC; 8878 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8879 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8880 "$src2, $src1", "$src1, $src2", 8881 (X86fsqrts (_.VT _.RC:$src1), 8882 _.ScalarIntMemCPat:$src2)>, 8883 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8884 let Uses = [MXCSR] in 8885 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8886 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 8887 "$rc, $src2, $src1", "$src1, $src2, $rc", 8888 (X86fsqrtRnds (_.VT _.RC:$src1), 8889 (_.VT _.RC:$src2), 8890 (i32 timm:$rc))>, 8891 EVEX_B, EVEX_RC, Sched<[sched]>; 8892 8893 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { 8894 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 8895 (ins _.FRC:$src1, _.FRC:$src2), 8896 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8897 Sched<[sched]>, SIMD_EXC; 8898 let mayLoad = 1 in 8899 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 8900 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 8901 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8902 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8903 } 8904 } 8905 8906 let Predicates = [HasAVX512] in { 8907 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 8908 (!cast<Instruction>(Name#Zr) 8909 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 8910 } 8911 8912 let Predicates = [HasAVX512, OptForSize] in { 8913 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 8914 (!cast<Instruction>(Name#Zm) 8915 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 8916 } 8917} 8918 8919multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 8920 X86SchedWriteSizes sched> { 8921 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 8922 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 8923 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 8924 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 8925} 8926 8927defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 8928 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 8929 8930defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 8931 8932multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 8933 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8934 let ExeDomain = _.ExeDomain in { 8935 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8936 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 8937 "$src3, $src2, $src1", "$src1, $src2, $src3", 8938 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8939 (i32 timm:$src3)))>, 8940 Sched<[sched]>, SIMD_EXC; 8941 8942 let Uses = [MXCSR] in 8943 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8944 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 8945 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 8946 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8947 (i32 timm:$src3)))>, EVEX_B, 8948 Sched<[sched]>; 8949 8950 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8951 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 8952 OpcodeStr, 8953 "$src3, $src2, $src1", "$src1, $src2, $src3", 8954 (_.VT (X86RndScales _.RC:$src1, 8955 _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, 8956 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8957 8958 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 8959 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 8960 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 8961 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8962 []>, Sched<[sched]>, SIMD_EXC; 8963 8964 let mayLoad = 1 in 8965 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 8966 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 8967 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8968 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8969 } 8970 } 8971 8972 let Predicates = [HasAVX512] in { 8973 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 8974 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8975 _.FRC:$src1, timm:$src2))>; 8976 } 8977 8978 let Predicates = [HasAVX512, OptForSize] in { 8979 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 8980 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8981 addr:$src1, timm:$src2))>; 8982 } 8983} 8984 8985defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 8986 SchedWriteFRnd.Scl, f32x_info>, 8987 AVX512AIi8Base, EVEX_4V, VEX_LIG, 8988 EVEX_CD8<32, CD8VT1>; 8989 8990defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 8991 SchedWriteFRnd.Scl, f64x_info>, 8992 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 8993 EVEX_CD8<64, CD8VT1>; 8994 8995multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 8996 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 8997 dag OutMask, Predicate BasePredicate> { 8998 let Predicates = [BasePredicate] in { 8999 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 9000 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9001 (extractelt _.VT:$dst, (iPTR 0))))), 9002 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9003 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9004 9005 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 9006 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9007 ZeroFP))), 9008 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9009 OutMask, _.VT:$src2, _.VT:$src1)>; 9010 } 9011} 9012 9013defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9014 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9015 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9016defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9017 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9018 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9019 9020 9021//------------------------------------------------- 9022// Integer truncate and extend operations 9023//------------------------------------------------- 9024 9025// PatFrags that contain a select and a truncate op. The take operands in the 9026// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9027// either to the multiclasses. 9028def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9029 (vselect node:$mask, 9030 (trunc node:$src), node:$src0)>; 9031def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9032 (vselect node:$mask, 9033 (X86vtruncs node:$src), node:$src0)>; 9034def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9035 (vselect node:$mask, 9036 (X86vtruncus node:$src), node:$src0)>; 9037 9038multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9039 SDPatternOperator MaskNode, 9040 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9041 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9042 let ExeDomain = DestInfo.ExeDomain in { 9043 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9044 (ins SrcInfo.RC:$src), 9045 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9046 [(set DestInfo.RC:$dst, 9047 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9048 EVEX, Sched<[sched]>; 9049 let Constraints = "$src0 = $dst" in 9050 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9051 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9052 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9053 [(set DestInfo.RC:$dst, 9054 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9055 (DestInfo.VT DestInfo.RC:$src0), 9056 SrcInfo.KRCWM:$mask))]>, 9057 EVEX, EVEX_K, Sched<[sched]>; 9058 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9059 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9060 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9061 [(set DestInfo.RC:$dst, 9062 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9063 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9064 EVEX, EVEX_KZ, Sched<[sched]>; 9065 } 9066 9067 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9068 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9069 (ins x86memop:$dst, SrcInfo.RC:$src), 9070 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9071 EVEX, Sched<[sched.Folded]>; 9072 9073 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9074 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9075 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9076 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9077 }//mayStore = 1, hasSideEffects = 0 9078} 9079 9080multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9081 X86VectorVTInfo DestInfo, 9082 PatFrag truncFrag, PatFrag mtruncFrag, 9083 string Name> { 9084 9085 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9086 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr) 9087 addr:$dst, SrcInfo.RC:$src)>; 9088 9089 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9090 SrcInfo.KRCWM:$mask), 9091 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk) 9092 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9093} 9094 9095multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9096 SDNode OpNode256, SDNode OpNode512, 9097 SDPatternOperator MaskNode128, 9098 SDPatternOperator MaskNode256, 9099 SDPatternOperator MaskNode512, 9100 X86FoldableSchedWrite sched, 9101 AVX512VLVectorVTInfo VTSrcInfo, 9102 X86VectorVTInfo DestInfoZ128, 9103 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9104 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9105 X86MemOperand x86memopZ, PatFrag truncFrag, 9106 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9107 9108 let Predicates = [HasVLX, prd] in { 9109 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9110 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9111 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, 9112 truncFrag, mtruncFrag, NAME>, EVEX_V128; 9113 9114 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9115 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9116 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, 9117 truncFrag, mtruncFrag, NAME>, EVEX_V256; 9118 } 9119 let Predicates = [prd] in 9120 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9121 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9122 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, 9123 truncFrag, mtruncFrag, NAME>, EVEX_V512; 9124} 9125 9126multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9127 SDPatternOperator MaskNode, 9128 X86FoldableSchedWrite sched, PatFrag StoreNode, 9129 PatFrag MaskedStoreNode, SDNode InVecNode, 9130 SDPatternOperator InVecMaskNode> { 9131 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9132 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9133 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9134 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9135 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9136} 9137 9138multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9139 SDPatternOperator MaskNode, 9140 X86FoldableSchedWrite sched, PatFrag StoreNode, 9141 PatFrag MaskedStoreNode, SDNode InVecNode, 9142 SDPatternOperator InVecMaskNode> { 9143 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9144 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9145 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9146 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9147 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9148} 9149 9150multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9151 SDPatternOperator MaskNode, 9152 X86FoldableSchedWrite sched, PatFrag StoreNode, 9153 PatFrag MaskedStoreNode, SDNode InVecNode, 9154 SDPatternOperator InVecMaskNode> { 9155 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9156 InVecMaskNode, MaskNode, MaskNode, sched, 9157 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9158 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9159 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9160} 9161 9162multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9163 SDPatternOperator MaskNode, 9164 X86FoldableSchedWrite sched, PatFrag StoreNode, 9165 PatFrag MaskedStoreNode, SDNode InVecNode, 9166 SDPatternOperator InVecMaskNode> { 9167 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9168 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9169 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9170 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9171 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9172} 9173 9174multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9175 SDPatternOperator MaskNode, 9176 X86FoldableSchedWrite sched, PatFrag StoreNode, 9177 PatFrag MaskedStoreNode, SDNode InVecNode, 9178 SDPatternOperator InVecMaskNode> { 9179 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9180 InVecMaskNode, MaskNode, MaskNode, sched, 9181 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9182 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9183 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9184} 9185 9186multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9187 SDPatternOperator MaskNode, 9188 X86FoldableSchedWrite sched, PatFrag StoreNode, 9189 PatFrag MaskedStoreNode, SDNode InVecNode, 9190 SDPatternOperator InVecMaskNode> { 9191 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9192 InVecMaskNode, MaskNode, MaskNode, sched, 9193 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9194 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9195 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9196} 9197 9198defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc, 9199 WriteShuffle256, truncstorevi8, 9200 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9201defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs, 9202 WriteShuffle256, truncstore_s_vi8, 9203 masked_truncstore_s_vi8, X86vtruncs, 9204 X86vmtruncs>; 9205defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, 9206 select_truncus, WriteShuffle256, 9207 truncstore_us_vi8, masked_truncstore_us_vi8, 9208 X86vtruncus, X86vmtruncus>; 9209 9210defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9211 WriteShuffle256, truncstorevi16, 9212 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9213defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9214 WriteShuffle256, truncstore_s_vi16, 9215 masked_truncstore_s_vi16, X86vtruncs, 9216 X86vmtruncs>; 9217defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9218 select_truncus, WriteShuffle256, 9219 truncstore_us_vi16, masked_truncstore_us_vi16, 9220 X86vtruncus, X86vmtruncus>; 9221 9222defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9223 WriteShuffle256, truncstorevi32, 9224 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9225defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9226 WriteShuffle256, truncstore_s_vi32, 9227 masked_truncstore_s_vi32, X86vtruncs, 9228 X86vmtruncs>; 9229defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9230 select_truncus, WriteShuffle256, 9231 truncstore_us_vi32, masked_truncstore_us_vi32, 9232 X86vtruncus, X86vmtruncus>; 9233 9234defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9235 WriteShuffle256, truncstorevi8, 9236 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9237defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9238 WriteShuffle256, truncstore_s_vi8, 9239 masked_truncstore_s_vi8, X86vtruncs, 9240 X86vmtruncs>; 9241defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9242 select_truncus, WriteShuffle256, 9243 truncstore_us_vi8, masked_truncstore_us_vi8, 9244 X86vtruncus, X86vmtruncus>; 9245 9246defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9247 WriteShuffle256, truncstorevi16, 9248 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9249defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9250 WriteShuffle256, truncstore_s_vi16, 9251 masked_truncstore_s_vi16, X86vtruncs, 9252 X86vmtruncs>; 9253defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9254 select_truncus, WriteShuffle256, 9255 truncstore_us_vi16, masked_truncstore_us_vi16, 9256 X86vtruncus, X86vmtruncus>; 9257 9258defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9259 WriteShuffle256, truncstorevi8, 9260 masked_truncstorevi8, X86vtrunc, 9261 X86vmtrunc>; 9262defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9263 WriteShuffle256, truncstore_s_vi8, 9264 masked_truncstore_s_vi8, X86vtruncs, 9265 X86vmtruncs>; 9266defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9267 select_truncus, WriteShuffle256, 9268 truncstore_us_vi8, masked_truncstore_us_vi8, 9269 X86vtruncus, X86vmtruncus>; 9270 9271let Predicates = [HasAVX512, NoVLX] in { 9272def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9273 (v8i16 (EXTRACT_SUBREG 9274 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9275 VR256X:$src, sub_ymm)))), sub_xmm))>; 9276def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9277 (v4i32 (EXTRACT_SUBREG 9278 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9279 VR256X:$src, sub_ymm)))), sub_xmm))>; 9280} 9281 9282let Predicates = [HasBWI, NoVLX] in { 9283def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9284 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9285 VR256X:$src, sub_ymm))), sub_xmm))>; 9286} 9287 9288// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9289multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9290 X86VectorVTInfo DestInfo, 9291 X86VectorVTInfo SrcInfo> { 9292 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9293 DestInfo.RC:$src0, 9294 SrcInfo.KRCWM:$mask)), 9295 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9296 SrcInfo.KRCWM:$mask, 9297 SrcInfo.RC:$src)>; 9298 9299 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9300 DestInfo.ImmAllZerosV, 9301 SrcInfo.KRCWM:$mask)), 9302 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9303 SrcInfo.RC:$src)>; 9304} 9305 9306let Predicates = [HasVLX] in { 9307defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9308defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9309defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9310} 9311 9312let Predicates = [HasAVX512] in { 9313defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9314defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9315defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9316 9317defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9318defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9319defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9320 9321defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9322defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9323defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9324} 9325 9326multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9327 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9328 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9329 let ExeDomain = DestInfo.ExeDomain in { 9330 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9331 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9332 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9333 EVEX, Sched<[sched]>; 9334 9335 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9336 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9337 (DestInfo.VT (LdFrag addr:$src))>, 9338 EVEX, Sched<[sched.Folded]>; 9339 } 9340} 9341 9342multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 9343 SDNode OpNode, SDNode InVecNode, string ExtTy, 9344 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9345 let Predicates = [HasVLX, HasBWI] in { 9346 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 9347 v16i8x_info, i64mem, LdFrag, InVecNode>, 9348 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9349 9350 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 9351 v16i8x_info, i128mem, LdFrag, OpNode>, 9352 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9353 } 9354 let Predicates = [HasBWI] in { 9355 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 9356 v32i8x_info, i256mem, LdFrag, OpNode>, 9357 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9358 } 9359} 9360 9361multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 9362 SDNode OpNode, SDNode InVecNode, string ExtTy, 9363 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9364 let Predicates = [HasVLX, HasAVX512] in { 9365 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9366 v16i8x_info, i32mem, LdFrag, InVecNode>, 9367 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9368 9369 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9370 v16i8x_info, i64mem, LdFrag, InVecNode>, 9371 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9372 } 9373 let Predicates = [HasAVX512] in { 9374 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9375 v16i8x_info, i128mem, LdFrag, OpNode>, 9376 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9377 } 9378} 9379 9380multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 9381 SDNode OpNode, SDNode InVecNode, string ExtTy, 9382 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9383 let Predicates = [HasVLX, HasAVX512] in { 9384 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9385 v16i8x_info, i16mem, LdFrag, InVecNode>, 9386 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 9387 9388 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9389 v16i8x_info, i32mem, LdFrag, InVecNode>, 9390 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 9391 } 9392 let Predicates = [HasAVX512] in { 9393 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9394 v16i8x_info, i64mem, LdFrag, InVecNode>, 9395 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 9396 } 9397} 9398 9399multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 9400 SDNode OpNode, SDNode InVecNode, string ExtTy, 9401 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9402 let Predicates = [HasVLX, HasAVX512] in { 9403 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9404 v8i16x_info, i64mem, LdFrag, InVecNode>, 9405 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9406 9407 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9408 v8i16x_info, i128mem, LdFrag, OpNode>, 9409 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9410 } 9411 let Predicates = [HasAVX512] in { 9412 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9413 v16i16x_info, i256mem, LdFrag, OpNode>, 9414 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9415 } 9416} 9417 9418multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 9419 SDNode OpNode, SDNode InVecNode, string ExtTy, 9420 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9421 let Predicates = [HasVLX, HasAVX512] in { 9422 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9423 v8i16x_info, i32mem, LdFrag, InVecNode>, 9424 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9425 9426 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9427 v8i16x_info, i64mem, LdFrag, InVecNode>, 9428 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9429 } 9430 let Predicates = [HasAVX512] in { 9431 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9432 v8i16x_info, i128mem, LdFrag, OpNode>, 9433 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9434 } 9435} 9436 9437multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 9438 SDNode OpNode, SDNode InVecNode, string ExtTy, 9439 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9440 9441 let Predicates = [HasVLX, HasAVX512] in { 9442 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9443 v4i32x_info, i64mem, LdFrag, InVecNode>, 9444 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 9445 9446 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9447 v4i32x_info, i128mem, LdFrag, OpNode>, 9448 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 9449 } 9450 let Predicates = [HasAVX512] in { 9451 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9452 v8i32x_info, i256mem, LdFrag, OpNode>, 9453 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 9454 } 9455} 9456 9457defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>; 9458defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>; 9459defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>; 9460defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>; 9461defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>; 9462defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>; 9463 9464defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>; 9465defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>; 9466defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>; 9467defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>; 9468defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>; 9469defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>; 9470 9471 9472// Patterns that we also need any extend versions of. aext_vector_inreg 9473// is currently legalized to zext_vector_inreg. 9474multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 9475 // 256-bit patterns 9476 let Predicates = [HasVLX, HasBWI] in { 9477 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 9478 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9479 } 9480 9481 let Predicates = [HasVLX] in { 9482 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 9483 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9484 9485 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 9486 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9487 } 9488 9489 // 512-bit patterns 9490 let Predicates = [HasBWI] in { 9491 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 9492 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 9493 } 9494 let Predicates = [HasAVX512] in { 9495 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 9496 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 9497 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 9498 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 9499 9500 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 9501 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 9502 9503 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 9504 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 9505 } 9506} 9507 9508multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 9509 SDNode InVecOp> : 9510 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 9511 // 128-bit patterns 9512 let Predicates = [HasVLX, HasBWI] in { 9513 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9514 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9515 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9516 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9517 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9518 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9519 } 9520 let Predicates = [HasVLX] in { 9521 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9522 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9523 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9524 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9525 9526 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 9527 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9528 9529 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9530 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9531 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9532 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9533 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9534 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9535 9536 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9537 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9538 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 9539 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9540 9541 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9542 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9543 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9544 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9545 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9546 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9547 } 9548 let Predicates = [HasVLX] in { 9549 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9550 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9551 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9552 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9553 9554 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9555 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9556 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9557 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9558 9559 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9560 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9561 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9562 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9563 } 9564 // 512-bit patterns 9565 let Predicates = [HasAVX512] in { 9566 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9567 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9568 } 9569} 9570 9571defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 9572defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 9573 9574// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 9575// ext+trunc aggressively making it impossible to legalize the DAG to this 9576// pattern directly. 9577let Predicates = [HasAVX512, NoBWI] in { 9578def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9579 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 9580def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 9581 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 9582} 9583 9584//===----------------------------------------------------------------------===// 9585// GATHER - SCATTER Operations 9586 9587// FIXME: Improve scheduling of gather/scatter instructions. 9588multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9589 X86MemOperand memop, PatFrag GatherNode, 9590 RegisterClass MaskRC = _.KRCWM> { 9591 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 9592 ExeDomain = _.ExeDomain in 9593 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 9594 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 9595 !strconcat(OpcodeStr#_.Suffix, 9596 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 9597 [(set _.RC:$dst, MaskRC:$mask_wb, 9598 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask, 9599 vectoraddr:$src2))]>, EVEX, EVEX_K, 9600 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; 9601} 9602 9603multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 9604 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9605 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, 9606 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W; 9607 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512, 9608 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W; 9609let Predicates = [HasVLX] in { 9610 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9611 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W; 9612 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256, 9613 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W; 9614 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9615 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W; 9616 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9617 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W; 9618} 9619} 9620 9621multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 9622 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9623 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem, 9624 mgatherv16i32>, EVEX_V512; 9625 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem, 9626 mgatherv8i64>, EVEX_V512; 9627let Predicates = [HasVLX] in { 9628 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9629 vy256xmem, mgatherv8i32>, EVEX_V256; 9630 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9631 vy128xmem, mgatherv4i64>, EVEX_V256; 9632 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9633 vx128xmem, mgatherv4i32>, EVEX_V128; 9634 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9635 vx64xmem, mgatherv2i64, VK2WM>, 9636 EVEX_V128; 9637} 9638} 9639 9640 9641defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 9642 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 9643 9644defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 9645 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 9646 9647multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9648 X86MemOperand memop, PatFrag ScatterNode, 9649 RegisterClass MaskRC = _.KRCWM> { 9650 9651let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in 9652 9653 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 9654 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 9655 !strconcat(OpcodeStr#_.Suffix, 9656 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 9657 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src), 9658 MaskRC:$mask, vectoraddr:$dst))]>, 9659 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9660 Sched<[WriteStore]>; 9661} 9662 9663multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 9664 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9665 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, 9666 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W; 9667 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512, 9668 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W; 9669let Predicates = [HasVLX] in { 9670 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9671 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W; 9672 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256, 9673 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W; 9674 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9675 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W; 9676 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9677 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W; 9678} 9679} 9680 9681multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 9682 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9683 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem, 9684 mscatterv16i32>, EVEX_V512; 9685 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem, 9686 mscatterv8i64>, EVEX_V512; 9687let Predicates = [HasVLX] in { 9688 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9689 vy256xmem, mscatterv8i32>, EVEX_V256; 9690 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9691 vy128xmem, mscatterv4i64>, EVEX_V256; 9692 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9693 vx128xmem, mscatterv4i32>, EVEX_V128; 9694 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9695 vx64xmem, mscatterv2i64, VK2WM>, 9696 EVEX_V128; 9697} 9698} 9699 9700defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 9701 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 9702 9703defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 9704 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 9705 9706// prefetch 9707multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 9708 RegisterClass KRC, X86MemOperand memop> { 9709 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 9710 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 9711 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 9712 EVEX, EVEX_K, Sched<[WriteLoad]>; 9713} 9714 9715defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 9716 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9717 9718defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 9719 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9720 9721defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 9722 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9723 9724defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 9725 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9726 9727defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 9728 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9729 9730defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 9731 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9732 9733defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 9734 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9735 9736defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 9737 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9738 9739defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 9740 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9741 9742defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 9743 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9744 9745defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 9746 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9747 9748defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 9749 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9750 9751defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 9752 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9753 9754defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 9755 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9756 9757defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 9758 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9759 9760defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 9761 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9762 9763multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 9764def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 9765 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 9766 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 9767 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 9768 9769// Also need a pattern for anyextend. 9770def : Pat<(Vec.VT (anyext Vec.KRC:$src)), 9771 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>; 9772} 9773 9774multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 9775 string OpcodeStr, Predicate prd> { 9776let Predicates = [prd] in 9777 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 9778 9779 let Predicates = [prd, HasVLX] in { 9780 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 9781 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 9782 } 9783} 9784 9785defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 9786defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 9787defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 9788defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 9789 9790multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 9791 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 9792 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 9793 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 9794 EVEX, Sched<[WriteMove]>; 9795} 9796 9797// Use 512bit version to implement 128/256 bit in case NoVLX. 9798multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 9799 X86VectorVTInfo _, 9800 string Name> { 9801 9802 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 9803 (_.KVT (COPY_TO_REGCLASS 9804 (!cast<Instruction>(Name#"Zrr") 9805 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 9806 _.RC:$src, _.SubRegIdx)), 9807 _.KRC))>; 9808} 9809 9810multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 9811 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 9812 let Predicates = [prd] in 9813 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 9814 EVEX_V512; 9815 9816 let Predicates = [prd, HasVLX] in { 9817 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 9818 EVEX_V256; 9819 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 9820 EVEX_V128; 9821 } 9822 let Predicates = [prd, NoVLX] in { 9823 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 9824 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 9825 } 9826} 9827 9828defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 9829 avx512vl_i8_info, HasBWI>; 9830defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 9831 avx512vl_i16_info, HasBWI>, VEX_W; 9832defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 9833 avx512vl_i32_info, HasDQI>; 9834defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 9835 avx512vl_i64_info, HasDQI>, VEX_W; 9836 9837// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 9838// is available, but BWI is not. We can't handle this in lowering because 9839// a target independent DAG combine likes to combine sext and trunc. 9840let Predicates = [HasDQI, NoBWI] in { 9841 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 9842 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9843 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 9844 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9845 9846 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))), 9847 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9848 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))), 9849 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9850} 9851 9852let Predicates = [HasDQI, NoBWI, HasVLX] in { 9853 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 9854 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9855 9856 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))), 9857 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9858} 9859 9860//===----------------------------------------------------------------------===// 9861// AVX-512 - COMPRESS and EXPAND 9862// 9863 9864multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 9865 string OpcodeStr, X86FoldableSchedWrite sched> { 9866 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 9867 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9868 (null_frag)>, AVX5128IBase, 9869 Sched<[sched]>; 9870 9871 let mayStore = 1, hasSideEffects = 0 in 9872 def mr : AVX5128I<opc, MRMDestMem, (outs), 9873 (ins _.MemOp:$dst, _.RC:$src), 9874 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9875 []>, EVEX_CD8<_.EltSize, CD8VT1>, 9876 Sched<[sched.Folded]>; 9877 9878 def mrk : AVX5128I<opc, MRMDestMem, (outs), 9879 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 9880 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9881 []>, 9882 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9883 Sched<[sched.Folded]>; 9884} 9885 9886multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9887 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 9888 (!cast<Instruction>(Name#_.ZSuffix##mrk) 9889 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 9890 9891 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 9892 (!cast<Instruction>(Name#_.ZSuffix##rrk) 9893 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 9894 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 9895 (!cast<Instruction>(Name#_.ZSuffix##rrkz) 9896 _.KRCWM:$mask, _.RC:$src)>; 9897} 9898 9899multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 9900 X86FoldableSchedWrite sched, 9901 AVX512VLVectorVTInfo VTInfo, 9902 Predicate Pred = HasAVX512> { 9903 let Predicates = [Pred] in 9904 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 9905 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 9906 9907 let Predicates = [Pred, HasVLX] in { 9908 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 9909 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 9910 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 9911 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 9912 } 9913} 9914 9915// FIXME: Is there a better scheduler class for VPCOMPRESS? 9916defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 9917 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 9918defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 9919 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 9920defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 9921 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 9922defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 9923 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 9924 9925// expand 9926multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 9927 string OpcodeStr, X86FoldableSchedWrite sched> { 9928 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9929 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9930 (null_frag)>, AVX5128IBase, 9931 Sched<[sched]>; 9932 9933 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9934 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 9935 (null_frag)>, 9936 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 9937 Sched<[sched.Folded, sched.ReadAfterFold]>; 9938} 9939 9940multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9941 9942 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 9943 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 9944 _.KRCWM:$mask, addr:$src)>; 9945 9946 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 9947 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 9948 _.KRCWM:$mask, addr:$src)>; 9949 9950 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 9951 (_.VT _.RC:$src0))), 9952 (!cast<Instruction>(Name#_.ZSuffix##rmk) 9953 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 9954 9955 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 9956 (!cast<Instruction>(Name#_.ZSuffix##rrk) 9957 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 9958 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 9959 (!cast<Instruction>(Name#_.ZSuffix##rrkz) 9960 _.KRCWM:$mask, _.RC:$src)>; 9961} 9962 9963multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 9964 X86FoldableSchedWrite sched, 9965 AVX512VLVectorVTInfo VTInfo, 9966 Predicate Pred = HasAVX512> { 9967 let Predicates = [Pred] in 9968 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 9969 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 9970 9971 let Predicates = [Pred, HasVLX] in { 9972 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 9973 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 9974 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 9975 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 9976 } 9977} 9978 9979// FIXME: Is there a better scheduler class for VPEXPAND? 9980defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 9981 avx512vl_i32_info>, EVEX; 9982defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 9983 avx512vl_i64_info>, EVEX, VEX_W; 9984defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 9985 avx512vl_f32_info>, EVEX; 9986defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 9987 avx512vl_f64_info>, EVEX, VEX_W; 9988 9989//handle instruction reg_vec1 = op(reg_vec,imm) 9990// op(mem_vec,imm) 9991// op(broadcast(eltVt),imm) 9992//all instruction created with FROUND_CURRENT 9993multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 9994 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9995 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9996 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9997 (ins _.RC:$src1, i32u8imm:$src2), 9998 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 9999 (OpNode (_.VT _.RC:$src1), 10000 (i32 timm:$src2))>, Sched<[sched]>; 10001 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10002 (ins _.MemOp:$src1, i32u8imm:$src2), 10003 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 10004 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10005 (i32 timm:$src2))>, 10006 Sched<[sched.Folded, sched.ReadAfterFold]>; 10007 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10008 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10009 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, 10010 "${src1}"##_.BroadcastStr##", $src2", 10011 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10012 (i32 timm:$src2))>, EVEX_B, 10013 Sched<[sched.Folded, sched.ReadAfterFold]>; 10014 } 10015} 10016 10017//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10018multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10019 SDNode OpNode, X86FoldableSchedWrite sched, 10020 X86VectorVTInfo _> { 10021 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10022 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10023 (ins _.RC:$src1, i32u8imm:$src2), 10024 OpcodeStr##_.Suffix, "$src2, {sae}, $src1", 10025 "$src1, {sae}, $src2", 10026 (OpNode (_.VT _.RC:$src1), 10027 (i32 timm:$src2))>, 10028 EVEX_B, Sched<[sched]>; 10029} 10030 10031multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10032 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10033 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10034 let Predicates = [prd] in { 10035 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, 10036 _.info512>, 10037 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10038 sched.ZMM, _.info512>, EVEX_V512; 10039 } 10040 let Predicates = [prd, HasVLX] in { 10041 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, 10042 _.info128>, EVEX_V128; 10043 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, 10044 _.info256>, EVEX_V256; 10045 } 10046} 10047 10048//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10049// op(reg_vec2,mem_vec,imm) 10050// op(reg_vec2,broadcast(eltVt),imm) 10051//all instruction created with FROUND_CURRENT 10052multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10053 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10054 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10055 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10056 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10057 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10058 (OpNode (_.VT _.RC:$src1), 10059 (_.VT _.RC:$src2), 10060 (i32 timm:$src3))>, 10061 Sched<[sched]>; 10062 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10063 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10064 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10065 (OpNode (_.VT _.RC:$src1), 10066 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10067 (i32 timm:$src3))>, 10068 Sched<[sched.Folded, sched.ReadAfterFold]>; 10069 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10070 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10071 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10072 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10073 (OpNode (_.VT _.RC:$src1), 10074 (_.VT (_.BroadcastLdFrag addr:$src2)), 10075 (i32 timm:$src3))>, EVEX_B, 10076 Sched<[sched.Folded, sched.ReadAfterFold]>; 10077 } 10078} 10079 10080//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10081// op(reg_vec2,mem_vec,imm) 10082multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10083 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10084 X86VectorVTInfo SrcInfo>{ 10085 let ExeDomain = DestInfo.ExeDomain in { 10086 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10087 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10088 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10089 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10090 (SrcInfo.VT SrcInfo.RC:$src2), 10091 (i8 timm:$src3)))>, 10092 Sched<[sched]>; 10093 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10094 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10095 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10096 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10097 (SrcInfo.VT (bitconvert 10098 (SrcInfo.LdFrag addr:$src2))), 10099 (i8 timm:$src3)))>, 10100 Sched<[sched.Folded, sched.ReadAfterFold]>; 10101 } 10102} 10103 10104//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10105// op(reg_vec2,mem_vec,imm) 10106// op(reg_vec2,broadcast(eltVt),imm) 10107multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10108 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10109 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10110 10111 let ExeDomain = _.ExeDomain in 10112 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10113 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10114 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10115 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10116 (OpNode (_.VT _.RC:$src1), 10117 (_.VT (_.BroadcastLdFrag addr:$src2)), 10118 (i8 timm:$src3))>, EVEX_B, 10119 Sched<[sched.Folded, sched.ReadAfterFold]>; 10120} 10121 10122//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10123// op(reg_vec2,mem_scalar,imm) 10124multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10125 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10126 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10127 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10128 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10129 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10130 (OpNode (_.VT _.RC:$src1), 10131 (_.VT _.RC:$src2), 10132 (i32 timm:$src3))>, 10133 Sched<[sched]>; 10134 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10135 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10136 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10137 (OpNode (_.VT _.RC:$src1), 10138 (_.VT _.ScalarIntMemCPat:$src2), 10139 (i32 timm:$src3))>, 10140 Sched<[sched.Folded, sched.ReadAfterFold]>; 10141 } 10142} 10143 10144//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10145multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10146 SDNode OpNode, X86FoldableSchedWrite sched, 10147 X86VectorVTInfo _> { 10148 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10149 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10150 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10151 OpcodeStr, "$src3, {sae}, $src2, $src1", 10152 "$src1, $src2, {sae}, $src3", 10153 (OpNode (_.VT _.RC:$src1), 10154 (_.VT _.RC:$src2), 10155 (i32 timm:$src3))>, 10156 EVEX_B, Sched<[sched]>; 10157} 10158 10159//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10160multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10161 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10162 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10163 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10164 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10165 OpcodeStr, "$src3, {sae}, $src2, $src1", 10166 "$src1, $src2, {sae}, $src3", 10167 (OpNode (_.VT _.RC:$src1), 10168 (_.VT _.RC:$src2), 10169 (i32 timm:$src3))>, 10170 EVEX_B, Sched<[sched]>; 10171} 10172 10173multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10174 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10175 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10176 let Predicates = [prd] in { 10177 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10178 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10179 EVEX_V512; 10180 10181 } 10182 let Predicates = [prd, HasVLX] in { 10183 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10184 EVEX_V128; 10185 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10186 EVEX_V256; 10187 } 10188} 10189 10190multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10191 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10192 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10193 let Predicates = [Pred] in { 10194 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10195 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10196 } 10197 let Predicates = [Pred, HasVLX] in { 10198 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10199 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10200 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10201 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10202 } 10203} 10204 10205multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10206 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10207 Predicate Pred = HasAVX512> { 10208 let Predicates = [Pred] in { 10209 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10210 EVEX_V512; 10211 } 10212 let Predicates = [Pred, HasVLX] in { 10213 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10214 EVEX_V128; 10215 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10216 EVEX_V256; 10217 } 10218} 10219 10220multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10221 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10222 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10223 let Predicates = [prd] in { 10224 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10225 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10226 } 10227} 10228 10229multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10230 bits<8> opcPs, bits<8> opcPd, SDNode OpNode, 10231 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10232 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10233 opcPs, OpNode, OpNodeSAE, sched, prd>, 10234 EVEX_CD8<32, CD8VF>; 10235 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10236 opcPd, OpNode, OpNodeSAE, sched, prd>, 10237 EVEX_CD8<64, CD8VF>, VEX_W; 10238} 10239 10240defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10241 X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>, 10242 AVX512AIi8Base, EVEX; 10243defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10244 X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>, 10245 AVX512AIi8Base, EVEX; 10246defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10247 X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>, 10248 AVX512AIi8Base, EVEX; 10249 10250defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10251 0x50, X86VRange, X86VRangeSAE, 10252 SchedWriteFAdd, HasDQI>, 10253 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10254defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10255 0x50, X86VRange, X86VRangeSAE, 10256 SchedWriteFAdd, HasDQI>, 10257 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10258 10259defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10260 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10261 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10262defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10263 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10264 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10265 10266defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10267 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10268 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10269defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10270 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10271 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10272 10273defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10274 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10275 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10276defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10277 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10278 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10279 10280multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10281 X86FoldableSchedWrite sched, 10282 X86VectorVTInfo _, 10283 X86VectorVTInfo CastInfo, 10284 string EVEX2VEXOvrd> { 10285 let ExeDomain = _.ExeDomain in { 10286 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10287 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10288 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10289 (_.VT (bitconvert 10290 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10291 (i8 timm:$src3)))))>, 10292 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 10293 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10294 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10295 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10296 (_.VT 10297 (bitconvert 10298 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10299 (CastInfo.LdFrag addr:$src2), 10300 (i8 timm:$src3)))))>, 10301 Sched<[sched.Folded, sched.ReadAfterFold]>, 10302 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 10303 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10304 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10305 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10306 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10307 (_.VT 10308 (bitconvert 10309 (CastInfo.VT 10310 (X86Shuf128 _.RC:$src1, 10311 (_.BroadcastLdFrag addr:$src2), 10312 (i8 timm:$src3)))))>, EVEX_B, 10313 Sched<[sched.Folded, sched.ReadAfterFold]>; 10314 } 10315} 10316 10317multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10318 AVX512VLVectorVTInfo _, 10319 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 10320 string EVEX2VEXOvrd>{ 10321 let Predicates = [HasAVX512] in 10322 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10323 _.info512, CastInfo.info512, "">, EVEX_V512; 10324 10325 let Predicates = [HasAVX512, HasVLX] in 10326 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10327 _.info256, CastInfo.info256, 10328 EVEX2VEXOvrd>, EVEX_V256; 10329} 10330 10331defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10332 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10333defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10334 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10335defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10336 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10337defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10338 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10339 10340let Predicates = [HasAVX512] in { 10341// Provide fallback in case the load node that is used in the broadcast 10342// patterns above is used by additional users, which prevents the pattern 10343// selection. 10344def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 10345 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10346 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10347 0)>; 10348def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 10349 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10350 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10351 0)>; 10352 10353def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 10354 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10355 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10356 0)>; 10357def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 10358 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10359 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10360 0)>; 10361 10362def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 10363 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10364 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10365 0)>; 10366 10367def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 10368 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10369 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10370 0)>; 10371} 10372 10373multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10374 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10375 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 10376 // instantiation of this class. 10377 let ExeDomain = _.ExeDomain in { 10378 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10379 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10380 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10381 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 10382 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 10383 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10384 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10385 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10386 (_.VT (X86VAlign _.RC:$src1, 10387 (bitconvert (_.LdFrag addr:$src2)), 10388 (i8 timm:$src3)))>, 10389 Sched<[sched.Folded, sched.ReadAfterFold]>, 10390 EVEX2VEXOverride<"VPALIGNRrmi">; 10391 10392 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10393 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10394 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10395 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10396 (X86VAlign _.RC:$src1, 10397 (_.VT (_.BroadcastLdFrag addr:$src2)), 10398 (i8 timm:$src3))>, EVEX_B, 10399 Sched<[sched.Folded, sched.ReadAfterFold]>; 10400 } 10401} 10402 10403multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10404 AVX512VLVectorVTInfo _> { 10405 let Predicates = [HasAVX512] in { 10406 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10407 AVX512AIi8Base, EVEX_4V, EVEX_V512; 10408 } 10409 let Predicates = [HasAVX512, HasVLX] in { 10410 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10411 AVX512AIi8Base, EVEX_4V, EVEX_V128; 10412 // We can't really override the 256-bit version so change it back to unset. 10413 let EVEX2VEXOverride = ? in 10414 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10415 AVX512AIi8Base, EVEX_4V, EVEX_V256; 10416 } 10417} 10418 10419defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10420 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10421defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10422 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10423 VEX_W; 10424 10425defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10426 SchedWriteShuffle, avx512vl_i8_info, 10427 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10428 10429// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10430// into vpalignr. 10431def ValignqImm32XForm : SDNodeXForm<timm, [{ 10432 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10433}]>; 10434def ValignqImm8XForm : SDNodeXForm<timm, [{ 10435 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10436}]>; 10437def ValigndImm8XForm : SDNodeXForm<timm, [{ 10438 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10439}]>; 10440 10441multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10442 X86VectorVTInfo From, X86VectorVTInfo To, 10443 SDNodeXForm ImmXForm> { 10444 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10445 (bitconvert 10446 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10447 timm:$src3))), 10448 To.RC:$src0)), 10449 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10450 To.RC:$src1, To.RC:$src2, 10451 (ImmXForm timm:$src3))>; 10452 10453 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10454 (bitconvert 10455 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10456 timm:$src3))), 10457 To.ImmAllZerosV)), 10458 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10459 To.RC:$src1, To.RC:$src2, 10460 (ImmXForm timm:$src3))>; 10461 10462 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10463 (bitconvert 10464 (From.VT (OpNode From.RC:$src1, 10465 (From.LdFrag addr:$src2), 10466 timm:$src3))), 10467 To.RC:$src0)), 10468 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10469 To.RC:$src1, addr:$src2, 10470 (ImmXForm timm:$src3))>; 10471 10472 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10473 (bitconvert 10474 (From.VT (OpNode From.RC:$src1, 10475 (From.LdFrag addr:$src2), 10476 timm:$src3))), 10477 To.ImmAllZerosV)), 10478 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 10479 To.RC:$src1, addr:$src2, 10480 (ImmXForm timm:$src3))>; 10481} 10482 10483multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 10484 X86VectorVTInfo From, 10485 X86VectorVTInfo To, 10486 SDNodeXForm ImmXForm> : 10487 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 10488 def : Pat<(From.VT (OpNode From.RC:$src1, 10489 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 10490 timm:$src3)), 10491 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 10492 (ImmXForm timm:$src3))>; 10493 10494 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10495 (bitconvert 10496 (From.VT (OpNode From.RC:$src1, 10497 (bitconvert 10498 (To.VT (To.BroadcastLdFrag addr:$src2))), 10499 timm:$src3))), 10500 To.RC:$src0)), 10501 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 10502 To.RC:$src1, addr:$src2, 10503 (ImmXForm timm:$src3))>; 10504 10505 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10506 (bitconvert 10507 (From.VT (OpNode From.RC:$src1, 10508 (bitconvert 10509 (To.VT (To.BroadcastLdFrag addr:$src2))), 10510 timm:$src3))), 10511 To.ImmAllZerosV)), 10512 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 10513 To.RC:$src1, addr:$src2, 10514 (ImmXForm timm:$src3))>; 10515} 10516 10517let Predicates = [HasAVX512] in { 10518 // For 512-bit we lower to the widest element type we can. So we only need 10519 // to handle converting valignq to valignd. 10520 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 10521 v16i32_info, ValignqImm32XForm>; 10522} 10523 10524let Predicates = [HasVLX] in { 10525 // For 128-bit we lower to the widest element type we can. So we only need 10526 // to handle converting valignq to valignd. 10527 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 10528 v4i32x_info, ValignqImm32XForm>; 10529 // For 256-bit we lower to the widest element type we can. So we only need 10530 // to handle converting valignq to valignd. 10531 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 10532 v8i32x_info, ValignqImm32XForm>; 10533} 10534 10535let Predicates = [HasVLX, HasBWI] in { 10536 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 10537 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 10538 v16i8x_info, ValignqImm8XForm>; 10539 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 10540 v16i8x_info, ValigndImm8XForm>; 10541} 10542 10543defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 10544 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 10545 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 10546 10547multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10548 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10549 let ExeDomain = _.ExeDomain in { 10550 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10551 (ins _.RC:$src1), OpcodeStr, 10552 "$src1", "$src1", 10553 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 10554 Sched<[sched]>; 10555 10556 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10557 (ins _.MemOp:$src1), OpcodeStr, 10558 "$src1", "$src1", 10559 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 10560 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 10561 Sched<[sched.Folded]>; 10562 } 10563} 10564 10565multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10566 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 10567 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 10568 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10569 (ins _.ScalarMemOp:$src1), OpcodeStr, 10570 "${src1}"##_.BroadcastStr, 10571 "${src1}"##_.BroadcastStr, 10572 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 10573 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 10574 Sched<[sched.Folded]>; 10575} 10576 10577multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10578 X86SchedWriteWidths sched, 10579 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10580 let Predicates = [prd] in 10581 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10582 EVEX_V512; 10583 10584 let Predicates = [prd, HasVLX] in { 10585 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10586 EVEX_V256; 10587 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10588 EVEX_V128; 10589 } 10590} 10591 10592multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10593 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 10594 Predicate prd> { 10595 let Predicates = [prd] in 10596 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10597 EVEX_V512; 10598 10599 let Predicates = [prd, HasVLX] in { 10600 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10601 EVEX_V256; 10602 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10603 EVEX_V128; 10604 } 10605} 10606 10607multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 10608 SDNode OpNode, X86SchedWriteWidths sched, 10609 Predicate prd> { 10610 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 10611 avx512vl_i64_info, prd>, VEX_W; 10612 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 10613 avx512vl_i32_info, prd>; 10614} 10615 10616multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 10617 SDNode OpNode, X86SchedWriteWidths sched, 10618 Predicate prd> { 10619 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 10620 avx512vl_i16_info, prd>, VEX_WIG; 10621 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 10622 avx512vl_i8_info, prd>, VEX_WIG; 10623} 10624 10625multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 10626 bits<8> opc_d, bits<8> opc_q, 10627 string OpcodeStr, SDNode OpNode, 10628 X86SchedWriteWidths sched> { 10629 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 10630 HasAVX512>, 10631 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 10632 HasBWI>; 10633} 10634 10635defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 10636 SchedWriteVecALU>; 10637 10638// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 10639let Predicates = [HasAVX512, NoVLX] in { 10640 def : Pat<(v4i64 (abs VR256X:$src)), 10641 (EXTRACT_SUBREG 10642 (VPABSQZrr 10643 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 10644 sub_ymm)>; 10645 def : Pat<(v2i64 (abs VR128X:$src)), 10646 (EXTRACT_SUBREG 10647 (VPABSQZrr 10648 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 10649 sub_xmm)>; 10650} 10651 10652// Use 512bit version to implement 128/256 bit. 10653multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 10654 AVX512VLVectorVTInfo _, Predicate prd> { 10655 let Predicates = [prd, NoVLX] in { 10656 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 10657 (EXTRACT_SUBREG 10658 (!cast<Instruction>(InstrStr # "Zrr") 10659 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10660 _.info256.RC:$src1, 10661 _.info256.SubRegIdx)), 10662 _.info256.SubRegIdx)>; 10663 10664 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 10665 (EXTRACT_SUBREG 10666 (!cast<Instruction>(InstrStr # "Zrr") 10667 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10668 _.info128.RC:$src1, 10669 _.info128.SubRegIdx)), 10670 _.info128.SubRegIdx)>; 10671 } 10672} 10673 10674defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 10675 SchedWriteVecIMul, HasCDI>; 10676 10677// FIXME: Is there a better scheduler class for VPCONFLICT? 10678defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 10679 SchedWriteVecALU, HasCDI>; 10680 10681// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 10682defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 10683defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 10684 10685//===---------------------------------------------------------------------===// 10686// Counts number of ones - VPOPCNTD and VPOPCNTQ 10687//===---------------------------------------------------------------------===// 10688 10689// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 10690defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 10691 SchedWriteVecALU, HasVPOPCNTDQ>; 10692 10693defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 10694defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 10695 10696//===---------------------------------------------------------------------===// 10697// Replicate Single FP - MOVSHDUP and MOVSLDUP 10698//===---------------------------------------------------------------------===// 10699 10700multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 10701 X86SchedWriteWidths sched> { 10702 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 10703 avx512vl_f32_info, HasAVX512>, XS; 10704} 10705 10706defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 10707 SchedWriteFShuffle>; 10708defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 10709 SchedWriteFShuffle>; 10710 10711//===----------------------------------------------------------------------===// 10712// AVX-512 - MOVDDUP 10713//===----------------------------------------------------------------------===// 10714 10715multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 10716 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10717 let ExeDomain = _.ExeDomain in { 10718 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10719 (ins _.RC:$src), OpcodeStr, "$src", "$src", 10720 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 10721 Sched<[sched]>; 10722 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10723 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 10724 (_.VT (_.BroadcastLdFrag addr:$src))>, 10725 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 10726 Sched<[sched.Folded]>; 10727 } 10728} 10729 10730multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 10731 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 10732 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 10733 VTInfo.info512>, EVEX_V512; 10734 10735 let Predicates = [HasAVX512, HasVLX] in { 10736 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 10737 VTInfo.info256>, EVEX_V256; 10738 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 10739 VTInfo.info128>, EVEX_V128; 10740 } 10741} 10742 10743multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, 10744 X86SchedWriteWidths sched> { 10745 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, 10746 avx512vl_f64_info>, XD, VEX_W; 10747} 10748 10749defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; 10750 10751let Predicates = [HasVLX] in { 10752def : Pat<(v2f64 (X86VBroadcast f64:$src)), 10753 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10754def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), 10755 (VMOVDDUPZ128rm addr:$src)>; 10756def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), 10757 (VMOVDDUPZ128rm addr:$src)>; 10758 10759def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10760 (v2f64 VR128X:$src0)), 10761 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 10762 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10763def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10764 immAllZerosV), 10765 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10766 10767def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), 10768 (v2f64 VR128X:$src0)), 10769 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10770def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), 10771 immAllZerosV), 10772 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10773 10774def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), 10775 (v2f64 VR128X:$src0)), 10776 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10777def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), 10778 immAllZerosV), 10779 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10780} 10781 10782//===----------------------------------------------------------------------===// 10783// AVX-512 - Unpack Instructions 10784//===----------------------------------------------------------------------===// 10785 10786let Uses = []<Register>, mayRaiseFPException = 0 in { 10787defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, 10788 SchedWriteFShuffleSizes, 0, 1>; 10789defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, 10790 SchedWriteFShuffleSizes>; 10791} 10792 10793defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 10794 SchedWriteShuffle, HasBWI>; 10795defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 10796 SchedWriteShuffle, HasBWI>; 10797defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 10798 SchedWriteShuffle, HasBWI>; 10799defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 10800 SchedWriteShuffle, HasBWI>; 10801 10802defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 10803 SchedWriteShuffle, HasAVX512>; 10804defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 10805 SchedWriteShuffle, HasAVX512>; 10806defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 10807 SchedWriteShuffle, HasAVX512>; 10808defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 10809 SchedWriteShuffle, HasAVX512>; 10810 10811//===----------------------------------------------------------------------===// 10812// AVX-512 - Extract & Insert Integer Instructions 10813//===----------------------------------------------------------------------===// 10814 10815multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10816 X86VectorVTInfo _> { 10817 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 10818 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10819 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10820 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), 10821 addr:$dst)]>, 10822 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 10823} 10824 10825multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 10826 let Predicates = [HasBWI] in { 10827 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 10828 (ins _.RC:$src1, u8imm:$src2), 10829 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10830 [(set GR32orGR64:$dst, 10831 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, 10832 EVEX, TAPD, Sched<[WriteVecExtract]>; 10833 10834 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 10835 } 10836} 10837 10838multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 10839 let Predicates = [HasBWI] in { 10840 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 10841 (ins _.RC:$src1, u8imm:$src2), 10842 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10843 [(set GR32orGR64:$dst, 10844 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, 10845 EVEX, PD, Sched<[WriteVecExtract]>; 10846 10847 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 10848 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 10849 (ins _.RC:$src1, u8imm:$src2), 10850 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 10851 EVEX, TAPD, FoldGenData<NAME#rr>, 10852 Sched<[WriteVecExtract]>; 10853 10854 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 10855 } 10856} 10857 10858multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 10859 RegisterClass GRC> { 10860 let Predicates = [HasDQI] in { 10861 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 10862 (ins _.RC:$src1, u8imm:$src2), 10863 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10864 [(set GRC:$dst, 10865 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 10866 EVEX, TAPD, Sched<[WriteVecExtract]>; 10867 10868 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 10869 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10870 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10871 [(store (extractelt (_.VT _.RC:$src1), 10872 imm:$src2),addr:$dst)]>, 10873 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 10874 Sched<[WriteVecExtractSt]>; 10875 } 10876} 10877 10878defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 10879defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 10880defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 10881defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 10882 10883multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10884 X86VectorVTInfo _, PatFrag LdFrag> { 10885 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 10886 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10887 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10888 [(set _.RC:$dst, 10889 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, 10890 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 10891} 10892 10893multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 10894 X86VectorVTInfo _, PatFrag LdFrag> { 10895 let Predicates = [HasBWI] in { 10896 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10897 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 10898 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10899 [(set _.RC:$dst, 10900 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V, 10901 Sched<[WriteVecInsert]>; 10902 10903 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>; 10904 } 10905} 10906 10907multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 10908 X86VectorVTInfo _, RegisterClass GRC> { 10909 let Predicates = [HasDQI] in { 10910 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10911 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 10912 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10913 [(set _.RC:$dst, 10914 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 10915 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 10916 10917 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 10918 _.ScalarLdFrag>, TAPD; 10919 } 10920} 10921 10922defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 10923 extloadi8>, TAPD, VEX_WIG; 10924defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 10925 extloadi16>, PD, VEX_WIG; 10926defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 10927defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 10928 10929//===----------------------------------------------------------------------===// 10930// VSHUFPS - VSHUFPD Operations 10931//===----------------------------------------------------------------------===// 10932 10933multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, 10934 AVX512VLVectorVTInfo VTInfo_FP>{ 10935 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 10936 SchedWriteFShuffle>, 10937 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 10938 AVX512AIi8Base, EVEX_4V; 10939} 10940 10941defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; 10942defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; 10943 10944//===----------------------------------------------------------------------===// 10945// AVX-512 - Byte shift Left/Right 10946//===----------------------------------------------------------------------===// 10947 10948// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well? 10949multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 10950 Format MRMm, string OpcodeStr, 10951 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10952 def rr : AVX512<opc, MRMr, 10953 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 10954 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 10955 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 10956 Sched<[sched]>; 10957 def rm : AVX512<opc, MRMm, 10958 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 10959 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 10960 [(set _.RC:$dst,(_.VT (OpNode 10961 (_.VT (bitconvert (_.LdFrag addr:$src1))), 10962 (i8 timm:$src2))))]>, 10963 Sched<[sched.Folded, sched.ReadAfterFold]>; 10964} 10965 10966multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 10967 Format MRMm, string OpcodeStr, 10968 X86SchedWriteWidths sched, Predicate prd>{ 10969 let Predicates = [prd] in 10970 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 10971 sched.ZMM, v64i8_info>, EVEX_V512; 10972 let Predicates = [prd, HasVLX] in { 10973 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 10974 sched.YMM, v32i8x_info>, EVEX_V256; 10975 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 10976 sched.XMM, v16i8x_info>, EVEX_V128; 10977 } 10978} 10979defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 10980 SchedWriteShuffle, HasBWI>, 10981 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 10982defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 10983 SchedWriteShuffle, HasBWI>, 10984 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 10985 10986multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 10987 string OpcodeStr, X86FoldableSchedWrite sched, 10988 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 10989 let isCommutable = 1 in 10990 def rr : AVX512BI<opc, MRMSrcReg, 10991 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 10992 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 10993 [(set _dst.RC:$dst,(_dst.VT 10994 (OpNode (_src.VT _src.RC:$src1), 10995 (_src.VT _src.RC:$src2))))]>, 10996 Sched<[sched]>; 10997 def rm : AVX512BI<opc, MRMSrcMem, 10998 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 10999 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11000 [(set _dst.RC:$dst,(_dst.VT 11001 (OpNode (_src.VT _src.RC:$src1), 11002 (_src.VT (bitconvert 11003 (_src.LdFrag addr:$src2))))))]>, 11004 Sched<[sched.Folded, sched.ReadAfterFold]>; 11005} 11006 11007multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11008 string OpcodeStr, X86SchedWriteWidths sched, 11009 Predicate prd> { 11010 let Predicates = [prd] in 11011 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11012 v8i64_info, v64i8_info>, EVEX_V512; 11013 let Predicates = [prd, HasVLX] in { 11014 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11015 v4i64x_info, v32i8x_info>, EVEX_V256; 11016 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11017 v2i64x_info, v16i8x_info>, EVEX_V128; 11018 } 11019} 11020 11021defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11022 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11023 11024// Transforms to swizzle an immediate to enable better matching when 11025// memory operand isn't in the right place. 11026def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11027 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11028 uint8_t Imm = N->getZExtValue(); 11029 // Swap bits 1/4 and 3/6. 11030 uint8_t NewImm = Imm & 0xa5; 11031 if (Imm & 0x02) NewImm |= 0x10; 11032 if (Imm & 0x10) NewImm |= 0x02; 11033 if (Imm & 0x08) NewImm |= 0x40; 11034 if (Imm & 0x40) NewImm |= 0x08; 11035 return getI8Imm(NewImm, SDLoc(N)); 11036}]>; 11037def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11038 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11039 uint8_t Imm = N->getZExtValue(); 11040 // Swap bits 2/4 and 3/5. 11041 uint8_t NewImm = Imm & 0xc3; 11042 if (Imm & 0x04) NewImm |= 0x10; 11043 if (Imm & 0x10) NewImm |= 0x04; 11044 if (Imm & 0x08) NewImm |= 0x20; 11045 if (Imm & 0x20) NewImm |= 0x08; 11046 return getI8Imm(NewImm, SDLoc(N)); 11047}]>; 11048def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11049 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11050 uint8_t Imm = N->getZExtValue(); 11051 // Swap bits 1/2 and 5/6. 11052 uint8_t NewImm = Imm & 0x99; 11053 if (Imm & 0x02) NewImm |= 0x04; 11054 if (Imm & 0x04) NewImm |= 0x02; 11055 if (Imm & 0x20) NewImm |= 0x40; 11056 if (Imm & 0x40) NewImm |= 0x20; 11057 return getI8Imm(NewImm, SDLoc(N)); 11058}]>; 11059def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11060 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11061 uint8_t Imm = N->getZExtValue(); 11062 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11063 uint8_t NewImm = Imm & 0x81; 11064 if (Imm & 0x02) NewImm |= 0x04; 11065 if (Imm & 0x04) NewImm |= 0x10; 11066 if (Imm & 0x08) NewImm |= 0x40; 11067 if (Imm & 0x10) NewImm |= 0x02; 11068 if (Imm & 0x20) NewImm |= 0x08; 11069 if (Imm & 0x40) NewImm |= 0x20; 11070 return getI8Imm(NewImm, SDLoc(N)); 11071}]>; 11072def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11073 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11074 uint8_t Imm = N->getZExtValue(); 11075 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11076 uint8_t NewImm = Imm & 0x81; 11077 if (Imm & 0x02) NewImm |= 0x10; 11078 if (Imm & 0x04) NewImm |= 0x02; 11079 if (Imm & 0x08) NewImm |= 0x20; 11080 if (Imm & 0x10) NewImm |= 0x04; 11081 if (Imm & 0x20) NewImm |= 0x40; 11082 if (Imm & 0x40) NewImm |= 0x08; 11083 return getI8Imm(NewImm, SDLoc(N)); 11084}]>; 11085 11086multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11087 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11088 string Name>{ 11089 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11090 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11091 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11092 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11093 (OpNode (_.VT _.RC:$src1), 11094 (_.VT _.RC:$src2), 11095 (_.VT _.RC:$src3), 11096 (i8 timm:$src4)), 1, 1>, 11097 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11098 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11099 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11100 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11101 (OpNode (_.VT _.RC:$src1), 11102 (_.VT _.RC:$src2), 11103 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11104 (i8 timm:$src4)), 1, 0>, 11105 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11106 Sched<[sched.Folded, sched.ReadAfterFold]>; 11107 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11108 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11109 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11110 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11111 (OpNode (_.VT _.RC:$src1), 11112 (_.VT _.RC:$src2), 11113 (_.VT (_.BroadcastLdFrag addr:$src3)), 11114 (i8 timm:$src4)), 1, 0>, EVEX_B, 11115 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11116 Sched<[sched.Folded, sched.ReadAfterFold]>; 11117 }// Constraints = "$src1 = $dst" 11118 11119 // Additional patterns for matching passthru operand in other positions. 11120 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11121 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11122 _.RC:$src1)), 11123 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11124 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11125 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11126 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11127 _.RC:$src1)), 11128 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11129 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11130 11131 // Additional patterns for matching loads in other positions. 11132 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), 11133 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), 11134 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11135 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11136 def : Pat<(_.VT (OpNode _.RC:$src1, 11137 (bitconvert (_.LdFrag addr:$src3)), 11138 _.RC:$src2, (i8 timm:$src4))), 11139 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11140 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11141 11142 // Additional patterns for matching zero masking with loads in other 11143 // positions. 11144 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11145 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11146 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11147 _.ImmAllZerosV)), 11148 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11149 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11150 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11151 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11152 _.RC:$src2, (i8 timm:$src4)), 11153 _.ImmAllZerosV)), 11154 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11155 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11156 11157 // Additional patterns for matching masked loads with different 11158 // operand orders. 11159 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11160 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11161 _.RC:$src2, (i8 timm:$src4)), 11162 _.RC:$src1)), 11163 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11164 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11165 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11166 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11167 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11168 _.RC:$src1)), 11169 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11170 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11171 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11172 (OpNode _.RC:$src2, _.RC:$src1, 11173 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11174 _.RC:$src1)), 11175 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11176 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11177 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11178 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11179 _.RC:$src1, (i8 timm:$src4)), 11180 _.RC:$src1)), 11181 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11182 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11183 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11184 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11185 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11186 _.RC:$src1)), 11187 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11188 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11189 11190 // Additional patterns for matching broadcasts in other positions. 11191 def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3), 11192 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), 11193 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11194 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11195 def : Pat<(_.VT (OpNode _.RC:$src1, 11196 (_.BroadcastLdFrag addr:$src3), 11197 _.RC:$src2, (i8 timm:$src4))), 11198 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11199 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11200 11201 // Additional patterns for matching zero masking with broadcasts in other 11202 // positions. 11203 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11204 (OpNode (_.BroadcastLdFrag addr:$src3), 11205 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11206 _.ImmAllZerosV)), 11207 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11208 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11209 (VPTERNLOG321_imm8 timm:$src4))>; 11210 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11211 (OpNode _.RC:$src1, 11212 (_.BroadcastLdFrag addr:$src3), 11213 _.RC:$src2, (i8 timm:$src4)), 11214 _.ImmAllZerosV)), 11215 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11216 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11217 (VPTERNLOG132_imm8 timm:$src4))>; 11218 11219 // Additional patterns for matching masked broadcasts with different 11220 // operand orders. 11221 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11222 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11223 _.RC:$src2, (i8 timm:$src4)), 11224 _.RC:$src1)), 11225 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11226 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11227 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11228 (OpNode (_.BroadcastLdFrag addr:$src3), 11229 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11230 _.RC:$src1)), 11231 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11232 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11233 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11234 (OpNode _.RC:$src2, _.RC:$src1, 11235 (_.BroadcastLdFrag addr:$src3), 11236 (i8 timm:$src4)), _.RC:$src1)), 11237 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11238 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11239 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11240 (OpNode _.RC:$src2, 11241 (_.BroadcastLdFrag addr:$src3), 11242 _.RC:$src1, (i8 timm:$src4)), 11243 _.RC:$src1)), 11244 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11245 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11246 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11247 (OpNode (_.BroadcastLdFrag addr:$src3), 11248 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11249 _.RC:$src1)), 11250 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11251 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11252} 11253 11254multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11255 AVX512VLVectorVTInfo _> { 11256 let Predicates = [HasAVX512] in 11257 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11258 _.info512, NAME>, EVEX_V512; 11259 let Predicates = [HasAVX512, HasVLX] in { 11260 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11261 _.info128, NAME>, EVEX_V128; 11262 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11263 _.info256, NAME>, EVEX_V256; 11264 } 11265} 11266 11267defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11268 avx512vl_i32_info>; 11269defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11270 avx512vl_i64_info>, VEX_W; 11271 11272// Patterns to use VPTERNLOG for vXi16/vXi8 vectors. 11273let Predicates = [HasVLX] in { 11274 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, 11275 (i8 timm:$src4))), 11276 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, 11277 timm:$src4)>; 11278 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, 11279 (loadv16i8 addr:$src3), (i8 timm:$src4))), 11280 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11281 timm:$src4)>; 11282 def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2, 11283 VR128X:$src1, (i8 timm:$src4))), 11284 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11285 (VPTERNLOG321_imm8 timm:$src4))>; 11286 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3), 11287 VR128X:$src2, (i8 timm:$src4))), 11288 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11289 (VPTERNLOG132_imm8 timm:$src4))>; 11290 11291 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, 11292 (i8 timm:$src4))), 11293 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, 11294 timm:$src4)>; 11295 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, 11296 (loadv8i16 addr:$src3), (i8 timm:$src4))), 11297 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11298 timm:$src4)>; 11299 def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2, 11300 VR128X:$src1, (i8 timm:$src4))), 11301 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11302 (VPTERNLOG321_imm8 timm:$src4))>; 11303 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3), 11304 VR128X:$src2, (i8 timm:$src4))), 11305 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11306 (VPTERNLOG132_imm8 timm:$src4))>; 11307 11308 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, 11309 (i8 timm:$src4))), 11310 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, 11311 timm:$src4)>; 11312 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, 11313 (loadv32i8 addr:$src3), (i8 timm:$src4))), 11314 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11315 timm:$src4)>; 11316 def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2, 11317 VR256X:$src1, (i8 timm:$src4))), 11318 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11319 (VPTERNLOG321_imm8 timm:$src4))>; 11320 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3), 11321 VR256X:$src2, (i8 timm:$src4))), 11322 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11323 (VPTERNLOG132_imm8 timm:$src4))>; 11324 11325 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, 11326 (i8 timm:$src4))), 11327 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, 11328 timm:$src4)>; 11329 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, 11330 (loadv16i16 addr:$src3), (i8 timm:$src4))), 11331 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11332 timm:$src4)>; 11333 def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2, 11334 VR256X:$src1, (i8 timm:$src4))), 11335 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11336 (VPTERNLOG321_imm8 timm:$src4))>; 11337 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3), 11338 VR256X:$src2, (i8 timm:$src4))), 11339 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11340 (VPTERNLOG132_imm8 timm:$src4))>; 11341} 11342 11343let Predicates = [HasAVX512] in { 11344 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, 11345 (i8 timm:$src4))), 11346 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, 11347 timm:$src4)>; 11348 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, 11349 (loadv64i8 addr:$src3), (i8 timm:$src4))), 11350 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11351 timm:$src4)>; 11352 def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2, 11353 VR512:$src1, (i8 timm:$src4))), 11354 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11355 (VPTERNLOG321_imm8 timm:$src4))>; 11356 def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3), 11357 VR512:$src2, (i8 timm:$src4))), 11358 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11359 (VPTERNLOG132_imm8 timm:$src4))>; 11360 11361 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, 11362 (i8 timm:$src4))), 11363 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, 11364 timm:$src4)>; 11365 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, 11366 (loadv32i16 addr:$src3), (i8 timm:$src4))), 11367 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11368 timm:$src4)>; 11369 def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2, 11370 VR512:$src1, (i8 timm:$src4))), 11371 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11372 (VPTERNLOG321_imm8 timm:$src4))>; 11373 def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), 11374 VR512:$src2, (i8 timm:$src4))), 11375 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11376 (VPTERNLOG132_imm8 timm:$src4))>; 11377} 11378 11379// Patterns to implement vnot using vpternlog instead of creating all ones 11380// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11381// so that the result is only dependent on src0. But we use the same source 11382// for all operands to prevent a false dependency. 11383// TODO: We should maybe have a more generalized algorithm for folding to 11384// vpternlog. 11385let Predicates = [HasAVX512] in { 11386 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)), 11387 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11388 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)), 11389 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11390 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)), 11391 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11392 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)), 11393 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11394} 11395 11396let Predicates = [HasAVX512, NoVLX] in { 11397 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11398 (EXTRACT_SUBREG 11399 (VPTERNLOGQZrri 11400 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11401 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11402 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11403 (i8 15)), sub_xmm)>; 11404 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11405 (EXTRACT_SUBREG 11406 (VPTERNLOGQZrri 11407 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11408 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11409 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11410 (i8 15)), sub_xmm)>; 11411 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11412 (EXTRACT_SUBREG 11413 (VPTERNLOGQZrri 11414 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11415 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11416 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11417 (i8 15)), sub_xmm)>; 11418 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11419 (EXTRACT_SUBREG 11420 (VPTERNLOGQZrri 11421 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11422 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11423 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11424 (i8 15)), sub_xmm)>; 11425 11426 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11427 (EXTRACT_SUBREG 11428 (VPTERNLOGQZrri 11429 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11430 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11431 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11432 (i8 15)), sub_ymm)>; 11433 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11434 (EXTRACT_SUBREG 11435 (VPTERNLOGQZrri 11436 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11437 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11438 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11439 (i8 15)), sub_ymm)>; 11440 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11441 (EXTRACT_SUBREG 11442 (VPTERNLOGQZrri 11443 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11444 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11445 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11446 (i8 15)), sub_ymm)>; 11447 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11448 (EXTRACT_SUBREG 11449 (VPTERNLOGQZrri 11450 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11451 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11452 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11453 (i8 15)), sub_ymm)>; 11454} 11455 11456let Predicates = [HasVLX] in { 11457 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11458 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11459 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11460 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11461 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11462 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11463 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11464 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11465 11466 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11467 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11468 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11469 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11470 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11471 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11472 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11473 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11474} 11475 11476//===----------------------------------------------------------------------===// 11477// AVX-512 - FixupImm 11478//===----------------------------------------------------------------------===// 11479 11480multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11481 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11482 X86VectorVTInfo TblVT>{ 11483 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 11484 Uses = [MXCSR], mayRaiseFPException = 1 in { 11485 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11486 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11487 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11488 (X86VFixupimm (_.VT _.RC:$src1), 11489 (_.VT _.RC:$src2), 11490 (TblVT.VT _.RC:$src3), 11491 (i32 timm:$src4))>, Sched<[sched]>; 11492 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11493 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11494 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11495 (X86VFixupimm (_.VT _.RC:$src1), 11496 (_.VT _.RC:$src2), 11497 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11498 (i32 timm:$src4))>, 11499 Sched<[sched.Folded, sched.ReadAfterFold]>; 11500 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11501 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11502 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11503 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11504 (X86VFixupimm (_.VT _.RC:$src1), 11505 (_.VT _.RC:$src2), 11506 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 11507 (i32 timm:$src4))>, 11508 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11509 } // Constraints = "$src1 = $dst" 11510} 11511 11512multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11513 X86FoldableSchedWrite sched, 11514 X86VectorVTInfo _, X86VectorVTInfo TblVT> 11515 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 11516let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 11517 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11518 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11519 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11520 "$src2, $src3, {sae}, $src4", 11521 (X86VFixupimmSAE (_.VT _.RC:$src1), 11522 (_.VT _.RC:$src2), 11523 (TblVT.VT _.RC:$src3), 11524 (i32 timm:$src4))>, 11525 EVEX_B, Sched<[sched]>; 11526 } 11527} 11528 11529multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 11530 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11531 X86VectorVTInfo _src3VT> { 11532 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11533 ExeDomain = _.ExeDomain in { 11534 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11535 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11536 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11537 (X86VFixupimms (_.VT _.RC:$src1), 11538 (_.VT _.RC:$src2), 11539 (_src3VT.VT _src3VT.RC:$src3), 11540 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 11541 let Uses = [MXCSR] in 11542 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11543 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11544 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11545 "$src2, $src3, {sae}, $src4", 11546 (X86VFixupimmSAEs (_.VT _.RC:$src1), 11547 (_.VT _.RC:$src2), 11548 (_src3VT.VT _src3VT.RC:$src3), 11549 (i32 timm:$src4))>, 11550 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11551 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11552 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11553 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11554 (X86VFixupimms (_.VT _.RC:$src1), 11555 (_.VT _.RC:$src2), 11556 (_src3VT.VT (scalar_to_vector 11557 (_src3VT.ScalarLdFrag addr:$src3))), 11558 (i32 timm:$src4))>, 11559 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 11560 } 11561} 11562 11563multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11564 AVX512VLVectorVTInfo _Vec, 11565 AVX512VLVectorVTInfo _Tbl> { 11566 let Predicates = [HasAVX512] in 11567 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 11568 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11569 EVEX_4V, EVEX_V512; 11570 let Predicates = [HasAVX512, HasVLX] in { 11571 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 11572 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11573 EVEX_4V, EVEX_V128; 11574 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 11575 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11576 EVEX_4V, EVEX_V256; 11577 } 11578} 11579 11580defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11581 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11582 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11583defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11584 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11585 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11586defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11587 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11588defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11589 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 11590 11591// Patterns used to select SSE scalar fp arithmetic instructions from 11592// either: 11593// 11594// (1) a scalar fp operation followed by a blend 11595// 11596// The effect is that the backend no longer emits unnecessary vector 11597// insert instructions immediately after SSE scalar fp instructions 11598// like addss or mulss. 11599// 11600// For example, given the following code: 11601// __m128 foo(__m128 A, __m128 B) { 11602// A[0] += B[0]; 11603// return A; 11604// } 11605// 11606// Previously we generated: 11607// addss %xmm0, %xmm1 11608// movss %xmm1, %xmm0 11609// 11610// We now generate: 11611// addss %xmm1, %xmm0 11612// 11613// (2) a vector packed single/double fp operation followed by a vector insert 11614// 11615// The effect is that the backend converts the packed fp instruction 11616// followed by a vector insert into a single SSE scalar fp instruction. 11617// 11618// For example, given the following code: 11619// __m128 foo(__m128 A, __m128 B) { 11620// __m128 C = A + B; 11621// return (__m128) {c[0], a[1], a[2], a[3]}; 11622// } 11623// 11624// Previously we generated: 11625// addps %xmm0, %xmm1 11626// movss %xmm1, %xmm0 11627// 11628// We now generate: 11629// addss %xmm1, %xmm0 11630 11631// TODO: Some canonicalization in lowering would simplify the number of 11632// patterns we have to try to match. 11633multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode, 11634 X86VectorVTInfo _, PatLeaf ZeroFP> { 11635 let Predicates = [HasAVX512] in { 11636 // extracted scalar math op with insert via movss 11637 def : Pat<(MoveNode 11638 (_.VT VR128X:$dst), 11639 (_.VT (scalar_to_vector 11640 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11641 _.FRC:$src)))), 11642 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, 11643 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 11644 def : Pat<(MoveNode 11645 (_.VT VR128X:$dst), 11646 (_.VT (scalar_to_vector 11647 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11648 (_.ScalarLdFrag addr:$src))))), 11649 (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>; 11650 11651 // extracted masked scalar math op with insert via movss 11652 def : Pat<(MoveNode (_.VT VR128X:$src1), 11653 (scalar_to_vector 11654 (X86selects VK1WM:$mask, 11655 (Op (_.EltVT 11656 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11657 _.FRC:$src2), 11658 _.FRC:$src0))), 11659 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk) 11660 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11661 VK1WM:$mask, _.VT:$src1, 11662 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11663 def : Pat<(MoveNode (_.VT VR128X:$src1), 11664 (scalar_to_vector 11665 (X86selects VK1WM:$mask, 11666 (Op (_.EltVT 11667 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11668 (_.ScalarLdFrag addr:$src2)), 11669 _.FRC:$src0))), 11670 (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk) 11671 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11672 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11673 11674 // extracted masked scalar math op with insert via movss 11675 def : Pat<(MoveNode (_.VT VR128X:$src1), 11676 (scalar_to_vector 11677 (X86selects VK1WM:$mask, 11678 (Op (_.EltVT 11679 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11680 _.FRC:$src2), (_.EltVT ZeroFP)))), 11681 (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 11682 VK1WM:$mask, _.VT:$src1, 11683 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11684 def : Pat<(MoveNode (_.VT VR128X:$src1), 11685 (scalar_to_vector 11686 (X86selects VK1WM:$mask, 11687 (Op (_.EltVT 11688 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11689 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 11690 (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11691 } 11692} 11693 11694defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 11695defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 11696defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 11697defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 11698 11699defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 11700defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 11701defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 11702defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 11703 11704multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, 11705 SDNode Move, X86VectorVTInfo _> { 11706 let Predicates = [HasAVX512] in { 11707 def : Pat<(_.VT (Move _.VT:$dst, 11708 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11709 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>; 11710 } 11711} 11712 11713defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 11714defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 11715 11716//===----------------------------------------------------------------------===// 11717// AES instructions 11718//===----------------------------------------------------------------------===// 11719 11720multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 11721 let Predicates = [HasVLX, HasVAES] in { 11722 defm Z128 : AESI_binop_rm_int<Op, OpStr, 11723 !cast<Intrinsic>(IntPrefix), 11724 loadv2i64, 0, VR128X, i128mem>, 11725 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 11726 defm Z256 : AESI_binop_rm_int<Op, OpStr, 11727 !cast<Intrinsic>(IntPrefix##"_256"), 11728 loadv4i64, 0, VR256X, i256mem>, 11729 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 11730 } 11731 let Predicates = [HasAVX512, HasVAES] in 11732 defm Z : AESI_binop_rm_int<Op, OpStr, 11733 !cast<Intrinsic>(IntPrefix##"_512"), 11734 loadv8i64, 0, VR512, i512mem>, 11735 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 11736} 11737 11738defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 11739defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 11740defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 11741defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 11742 11743//===----------------------------------------------------------------------===// 11744// PCLMUL instructions - Carry less multiplication 11745//===----------------------------------------------------------------------===// 11746 11747let Predicates = [HasAVX512, HasVPCLMULQDQ] in 11748defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 11749 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 11750 11751let Predicates = [HasVLX, HasVPCLMULQDQ] in { 11752defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 11753 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 11754 11755defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 11756 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 11757 EVEX_CD8<64, CD8VF>, VEX_WIG; 11758} 11759 11760// Aliases 11761defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 11762defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 11763defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 11764 11765//===----------------------------------------------------------------------===// 11766// VBMI2 11767//===----------------------------------------------------------------------===// 11768 11769multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 11770 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11771 let Constraints = "$src1 = $dst", 11772 ExeDomain = VTI.ExeDomain in { 11773 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11774 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11775 "$src3, $src2", "$src2, $src3", 11776 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 11777 AVX512FMA3Base, Sched<[sched]>; 11778 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11779 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11780 "$src3, $src2", "$src2, $src3", 11781 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11782 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11783 AVX512FMA3Base, 11784 Sched<[sched.Folded, sched.ReadAfterFold]>; 11785 } 11786} 11787 11788multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11789 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 11790 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 11791 let Constraints = "$src1 = $dst", 11792 ExeDomain = VTI.ExeDomain in 11793 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11794 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 11795 "${src3}"##VTI.BroadcastStr##", $src2", 11796 "$src2, ${src3}"##VTI.BroadcastStr, 11797 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11798 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 11799 AVX512FMA3Base, EVEX_B, 11800 Sched<[sched.Folded, sched.ReadAfterFold]>; 11801} 11802 11803multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 11804 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11805 let Predicates = [HasVBMI2] in 11806 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11807 EVEX_V512; 11808 let Predicates = [HasVBMI2, HasVLX] in { 11809 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11810 EVEX_V256; 11811 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11812 EVEX_V128; 11813 } 11814} 11815 11816multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 11817 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11818 let Predicates = [HasVBMI2] in 11819 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11820 EVEX_V512; 11821 let Predicates = [HasVBMI2, HasVLX] in { 11822 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11823 EVEX_V256; 11824 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11825 EVEX_V128; 11826 } 11827} 11828multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 11829 SDNode OpNode, X86SchedWriteWidths sched> { 11830 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched, 11831 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 11832 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched, 11833 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11834 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched, 11835 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 11836} 11837 11838multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 11839 SDNode OpNode, X86SchedWriteWidths sched> { 11840 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched, 11841 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 11842 VEX_W, EVEX_CD8<16, CD8VF>; 11843 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp, 11844 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11845 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode, 11846 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11847} 11848 11849// Concat & Shift 11850defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 11851defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 11852defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 11853defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 11854 11855// Compress 11856defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 11857 avx512vl_i8_info, HasVBMI2>, EVEX, 11858 NotMemoryFoldable; 11859defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 11860 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 11861 NotMemoryFoldable; 11862// Expand 11863defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 11864 avx512vl_i8_info, HasVBMI2>, EVEX; 11865defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 11866 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 11867 11868//===----------------------------------------------------------------------===// 11869// VNNI 11870//===----------------------------------------------------------------------===// 11871 11872let Constraints = "$src1 = $dst" in 11873multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11874 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11875 bit IsCommutable> { 11876 let ExeDomain = VTI.ExeDomain in { 11877 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11878 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11879 "$src3, $src2", "$src2, $src3", 11880 (VTI.VT (OpNode VTI.RC:$src1, 11881 VTI.RC:$src2, VTI.RC:$src3)), 11882 IsCommutable, IsCommutable>, 11883 EVEX_4V, T8PD, Sched<[sched]>; 11884 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11885 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11886 "$src3, $src2", "$src2, $src3", 11887 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11888 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11889 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 11890 Sched<[sched.Folded, sched.ReadAfterFold]>; 11891 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11892 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 11893 OpStr, "${src3}"##VTI.BroadcastStr##", $src2", 11894 "$src2, ${src3}"##VTI.BroadcastStr, 11895 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11896 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 11897 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 11898 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 11899 } 11900} 11901 11902multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 11903 X86SchedWriteWidths sched, bit IsCommutable> { 11904 let Predicates = [HasVNNI] in 11905 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 11906 IsCommutable>, EVEX_V512; 11907 let Predicates = [HasVNNI, HasVLX] in { 11908 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 11909 IsCommutable>, EVEX_V256; 11910 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 11911 IsCommutable>, EVEX_V128; 11912 } 11913} 11914 11915// FIXME: Is there a better scheduler class for VPDP? 11916defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 11917defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 11918defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 11919defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 11920 11921def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), 11922 (X86vpmaddwd node:$lhs, node:$rhs), [{ 11923 return N->hasOneUse(); 11924}]>; 11925 11926// Patterns to match VPDPWSSD from existing instructions/intrinsics. 11927let Predicates = [HasVNNI] in { 11928 def : Pat<(v16i32 (add VR512:$src1, 11929 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 11930 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 11931 def : Pat<(v16i32 (add VR512:$src1, 11932 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 11933 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 11934} 11935let Predicates = [HasVNNI,HasVLX] in { 11936 def : Pat<(v8i32 (add VR256X:$src1, 11937 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 11938 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 11939 def : Pat<(v8i32 (add VR256X:$src1, 11940 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 11941 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 11942 def : Pat<(v4i32 (add VR128X:$src1, 11943 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 11944 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 11945 def : Pat<(v4i32 (add VR128X:$src1, 11946 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 11947 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 11948} 11949 11950//===----------------------------------------------------------------------===// 11951// Bit Algorithms 11952//===----------------------------------------------------------------------===// 11953 11954// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 11955defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 11956 avx512vl_i8_info, HasBITALG>; 11957defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 11958 avx512vl_i16_info, HasBITALG>, VEX_W; 11959 11960defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 11961defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 11962 11963def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 11964 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 11965 return N->hasOneUse(); 11966}]>; 11967 11968multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11969 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 11970 (ins VTI.RC:$src1, VTI.RC:$src2), 11971 "vpshufbitqmb", 11972 "$src2, $src1", "$src1, $src2", 11973 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11974 (VTI.VT VTI.RC:$src2)), 11975 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 11976 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 11977 Sched<[sched]>; 11978 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 11979 (ins VTI.RC:$src1, VTI.MemOp:$src2), 11980 "vpshufbitqmb", 11981 "$src2, $src1", "$src1, $src2", 11982 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11983 (VTI.VT (VTI.LdFrag addr:$src2))), 11984 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 11985 (VTI.VT (VTI.LdFrag addr:$src2)))>, 11986 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 11987 Sched<[sched.Folded, sched.ReadAfterFold]>; 11988} 11989 11990multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11991 let Predicates = [HasBITALG] in 11992 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 11993 let Predicates = [HasBITALG, HasVLX] in { 11994 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 11995 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 11996 } 11997} 11998 11999// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12000defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12001 12002//===----------------------------------------------------------------------===// 12003// GFNI 12004//===----------------------------------------------------------------------===// 12005 12006multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12007 X86SchedWriteWidths sched> { 12008 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12009 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12010 EVEX_V512; 12011 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12012 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12013 EVEX_V256; 12014 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12015 EVEX_V128; 12016 } 12017} 12018 12019defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12020 SchedWriteVecALU>, 12021 EVEX_CD8<8, CD8VF>, T8PD; 12022 12023multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12024 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12025 X86VectorVTInfo BcstVTI> 12026 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12027 let ExeDomain = VTI.ExeDomain in 12028 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12029 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12030 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", 12031 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", 12032 (OpNode (VTI.VT VTI.RC:$src1), 12033 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12034 (i8 timm:$src3))>, EVEX_B, 12035 Sched<[sched.Folded, sched.ReadAfterFold]>; 12036} 12037 12038multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12039 X86SchedWriteWidths sched> { 12040 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12041 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12042 v64i8_info, v8i64_info>, EVEX_V512; 12043 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12044 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12045 v32i8x_info, v4i64x_info>, EVEX_V256; 12046 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12047 v16i8x_info, v2i64x_info>, EVEX_V128; 12048 } 12049} 12050 12051defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12052 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12053 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12054defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12055 X86GF2P8affineqb, SchedWriteVecIMul>, 12056 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12057 12058 12059//===----------------------------------------------------------------------===// 12060// AVX5124FMAPS 12061//===----------------------------------------------------------------------===// 12062 12063let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12064 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12065defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12066 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12067 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12068 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12069 Sched<[SchedWriteFMA.ZMM.Folded]>; 12070 12071defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12072 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12073 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12074 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12075 Sched<[SchedWriteFMA.ZMM.Folded]>; 12076 12077defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12078 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12079 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12080 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12081 Sched<[SchedWriteFMA.Scl.Folded]>; 12082 12083defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12084 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12085 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12086 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12087 Sched<[SchedWriteFMA.Scl.Folded]>; 12088} 12089 12090//===----------------------------------------------------------------------===// 12091// AVX5124VNNIW 12092//===----------------------------------------------------------------------===// 12093 12094let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12095 Constraints = "$src1 = $dst" in { 12096defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12097 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12098 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12099 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12100 Sched<[SchedWriteFMA.ZMM.Folded]>; 12101 12102defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12103 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12104 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12105 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12106 Sched<[SchedWriteFMA.ZMM.Folded]>; 12107} 12108 12109let hasSideEffects = 0 in { 12110 let mayStore = 1, SchedRW = [WriteFStoreX] in 12111 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12112 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12113 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12114} 12115 12116//===----------------------------------------------------------------------===// 12117// VP2INTERSECT 12118//===----------------------------------------------------------------------===// 12119 12120multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12121 def rr : I<0x68, MRMSrcReg, 12122 (outs _.KRPC:$dst), 12123 (ins _.RC:$src1, _.RC:$src2), 12124 !strconcat("vp2intersect", _.Suffix, 12125 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12126 [(set _.KRPC:$dst, (X86vp2intersect 12127 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12128 EVEX_4V, T8XD, Sched<[sched]>; 12129 12130 def rm : I<0x68, MRMSrcMem, 12131 (outs _.KRPC:$dst), 12132 (ins _.RC:$src1, _.MemOp:$src2), 12133 !strconcat("vp2intersect", _.Suffix, 12134 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12135 [(set _.KRPC:$dst, (X86vp2intersect 12136 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12137 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12138 Sched<[sched.Folded, sched.ReadAfterFold]>; 12139 12140 def rmb : I<0x68, MRMSrcMem, 12141 (outs _.KRPC:$dst), 12142 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12143 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12144 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12145 [(set _.KRPC:$dst, (X86vp2intersect 12146 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12147 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12148 Sched<[sched.Folded, sched.ReadAfterFold]>; 12149} 12150 12151multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12152 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12153 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12154 12155 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12156 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12157 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12158 } 12159} 12160 12161defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12162defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W; 12163 12164multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12165 X86SchedWriteWidths sched, 12166 AVX512VLVectorVTInfo _SrcVTInfo, 12167 AVX512VLVectorVTInfo _DstVTInfo, 12168 SDNode OpNode, Predicate prd, 12169 bit IsCommutable = 0> { 12170 let Predicates = [prd] in 12171 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12172 _SrcVTInfo.info512, _DstVTInfo.info512, 12173 _SrcVTInfo.info512, IsCommutable>, 12174 EVEX_V512, EVEX_CD8<32, CD8VF>; 12175 let Predicates = [HasVLX, prd] in { 12176 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12177 _SrcVTInfo.info256, _DstVTInfo.info256, 12178 _SrcVTInfo.info256, IsCommutable>, 12179 EVEX_V256, EVEX_CD8<32, CD8VF>; 12180 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12181 _SrcVTInfo.info128, _DstVTInfo.info128, 12182 _SrcVTInfo.info128, IsCommutable>, 12183 EVEX_V128, EVEX_CD8<32, CD8VF>; 12184 } 12185} 12186 12187defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12188 SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF 12189 avx512vl_f32_info, avx512vl_i16_info, 12190 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12191 12192// Truncate Float to BFloat16 12193multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12194 X86SchedWriteWidths sched> { 12195 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12196 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12197 X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12198 } 12199 let Predicates = [HasBF16, HasVLX] in { 12200 let Uses = []<Register>, mayRaiseFPException = 0 in { 12201 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12202 null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12203 VK4WM>, EVEX_V128; 12204 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12205 X86cvtneps2bf16, 12206 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12207 } 12208 12209 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 12210 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12211 VR128X:$src), 0>; 12212 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 12213 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12214 f128mem:$src), 0, "intel">; 12215 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 12216 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12217 VR256X:$src), 0>; 12218 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 12219 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12220 f256mem:$src), 0, "intel">; 12221 } 12222} 12223 12224defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12225 SchedWriteCvtPD2PS>, T8XS, 12226 EVEX_CD8<32, CD8VF>; 12227 12228let Predicates = [HasBF16, HasVLX] in { 12229 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12230 // patterns have been disabled with null_frag. 12231 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12232 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12233 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12234 VK4WM:$mask), 12235 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12236 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12237 VK4WM:$mask), 12238 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12239 12240 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12241 (VCVTNEPS2BF16Z128rm addr:$src)>; 12242 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12243 VK4WM:$mask), 12244 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12245 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12246 VK4WM:$mask), 12247 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12248 12249 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12250 (X86VBroadcastld32 addr:$src)))), 12251 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12252 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12253 (v8i16 VR128X:$src0), VK4WM:$mask), 12254 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12255 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12256 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12257 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12258} 12259 12260let Constraints = "$src1 = $dst" in { 12261multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12262 X86FoldableSchedWrite sched, 12263 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12264 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12265 (ins _.RC:$src2, _.RC:$src3), 12266 OpcodeStr, "$src3, $src2", "$src2, $src3", 12267 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, 12268 EVEX_4V, Sched<[sched]>; 12269 12270 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12271 (ins _.RC:$src2, _.MemOp:$src3), 12272 OpcodeStr, "$src3, $src2", "$src2, $src3", 12273 (_.VT (OpNode _.RC:$src1, _.RC:$src2, 12274 (src_v.VT (bitconvert 12275 (src_v.LdFrag addr:$src3)))))>, EVEX_4V, 12276 Sched<[sched.Folded, sched.ReadAfterFold]>; 12277 12278 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12279 (ins _.RC:$src2, _.ScalarMemOp:$src3), 12280 OpcodeStr, 12281 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12282 !strconcat("$src2, ${src3}", _.BroadcastStr), 12283 (_.VT (OpNode _.RC:$src1, _.RC:$src2, 12284 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12285 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 12286 12287} 12288} // Constraints = "$src1 = $dst" 12289 12290multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12291 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12292 AVX512VLVectorVTInfo src_v, Predicate prd> { 12293 let Predicates = [prd] in { 12294 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12295 src_v.info512>, EVEX_V512; 12296 } 12297 let Predicates = [HasVLX, prd] in { 12298 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12299 src_v.info256>, EVEX_V256; 12300 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12301 src_v.info128>, EVEX_V128; 12302 } 12303} 12304 12305defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12306 avx512vl_f32_info, avx512vl_i32_info, 12307 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12308