1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 32), 4, 48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; 49 50 // The vector VT. 51 ValueType VT = !cast<ValueType>(VTName); 52 53 string EltTypeName = !cast<string>(EltVT); 54 // Size of the element type in bits, e.g. 32 for v16i32. 55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 56 int EltSize = EltVT.Size; 57 58 // "i" for integer types and "f" for floating-point types 59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 60 61 // Size of RC in bits, e.g. 512 for VR512. 62 int Size = VT.Size; 63 64 // The corresponding memory operand, e.g. i512mem for VR512. 65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 67 // FP scalar memory operand for intrinsics - ssmem/sdmem. 68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)); 70 71 // Load patterns 72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 73 74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 75 76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 77 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 78 79 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), 80 !cast<ComplexPattern>("sse_load_f32"), 81 !if (!eq (EltTypeName, "f64"), 82 !cast<ComplexPattern>("sse_load_f64"), 83 ?)); 84 85 // The string to specify embedded broadcast in assembly. 86 string BroadcastStr = "{1to" # NumElts # "}"; 87 88 // 8-bit compressed displacement tuple/subvector format. This is only 89 // defined for NumElts <= 8. 90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 91 !cast<CD8VForm>("CD8VT" # NumElts), ?); 92 93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 94 !if (!eq (Size, 256), sub_ymm, ?)); 95 96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 98 SSEPackedInt)); 99 100 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); 101 102 dag ImmAllZerosV = (VT immAllZerosV); 103 104 string ZSuffix = !if (!eq (Size, 128), "Z128", 105 !if (!eq (Size, 256), "Z256", "Z")); 106} 107 108def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 111def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 113def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 114 115// "x" in v32i8x_info means RC = VR256X 116def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 118def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 119def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 120def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 121def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 122 123def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 124def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 125def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 126def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 127def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 128def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 129 130// We map scalar types to the smallest (128-bit) vector type 131// with the appropriate element type. This allows to use the same masking logic. 132def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 133def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 134def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 135def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 136 137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 138 X86VectorVTInfo i128> { 139 X86VectorVTInfo info512 = i512; 140 X86VectorVTInfo info256 = i256; 141 X86VectorVTInfo info128 = i128; 142} 143 144def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 145 v16i8x_info>; 146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 147 v8i16x_info>; 148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 149 v4i32x_info>; 150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 151 v2i64x_info>; 152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 153 v4f32x_info>; 154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 155 v2f64x_info>; 156 157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 158 ValueType _vt> { 159 RegisterClass KRC = _krc; 160 RegisterClass KRCWM = _krcwm; 161 ValueType KVT = _vt; 162} 163 164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 171 172// This multiclass generates the masking variants from the non-masking 173// variant. It only provides the assembly pieces for the masking variants. 174// It assumes custom ISel patterns for masking which can be provided as 175// template arguments. 176multiclass AVX512_maskable_custom<bits<8> O, Format F, 177 dag Outs, 178 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 179 string OpcodeStr, 180 string AttSrcAsm, string IntelSrcAsm, 181 list<dag> Pattern, 182 list<dag> MaskingPattern, 183 list<dag> ZeroMaskingPattern, 184 string MaskingConstraint = "", 185 bit IsCommutable = 0, 186 bit IsKCommutable = 0, 187 bit IsKZCommutable = IsCommutable> { 188 let isCommutable = IsCommutable in 189 def NAME: AVX512<O, F, Outs, Ins, 190 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 191 "$dst, "#IntelSrcAsm#"}", 192 Pattern>; 193 194 // Prefer over VMOV*rrk Pat<> 195 let isCommutable = IsKCommutable in 196 def NAME#k: AVX512<O, F, Outs, MaskingIns, 197 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 198 "$dst {${mask}}, "#IntelSrcAsm#"}", 199 MaskingPattern>, 200 EVEX_K { 201 // In case of the 3src subclass this is overridden with a let. 202 string Constraints = MaskingConstraint; 203 } 204 205 // Zero mask does not add any restrictions to commute operands transformation. 206 // So, it is Ok to use IsCommutable instead of IsKCommutable. 207 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<> 208 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 209 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 210 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 211 ZeroMaskingPattern>, 212 EVEX_KZ; 213} 214 215 216// Common base class of AVX512_maskable and AVX512_maskable_3src. 217multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 218 dag Outs, 219 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 220 string OpcodeStr, 221 string AttSrcAsm, string IntelSrcAsm, 222 dag RHS, dag MaskingRHS, 223 SDNode Select = vselect, 224 string MaskingConstraint = "", 225 bit IsCommutable = 0, 226 bit IsKCommutable = 0, 227 bit IsKZCommutable = IsCommutable> : 228 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 229 AttSrcAsm, IntelSrcAsm, 230 [(set _.RC:$dst, RHS)], 231 [(set _.RC:$dst, MaskingRHS)], 232 [(set _.RC:$dst, 233 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 234 MaskingConstraint, IsCommutable, 235 IsKCommutable, IsKZCommutable>; 236 237// This multiclass generates the unconditional/non-masking, the masking and 238// the zero-masking variant of the vector instruction. In the masking case, the 239// perserved vector elements come from a new dummy input operand tied to $dst. 240// This version uses a separate dag for non-masking and masking. 241multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 242 dag Outs, dag Ins, string OpcodeStr, 243 string AttSrcAsm, string IntelSrcAsm, 244 dag RHS, dag MaskRHS, 245 bit IsCommutable = 0, bit IsKCommutable = 0, 246 SDNode Select = vselect> : 247 AVX512_maskable_custom<O, F, Outs, Ins, 248 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 249 !con((ins _.KRCWM:$mask), Ins), 250 OpcodeStr, AttSrcAsm, IntelSrcAsm, 251 [(set _.RC:$dst, RHS)], 252 [(set _.RC:$dst, 253 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 254 [(set _.RC:$dst, 255 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 256 "$src0 = $dst", IsCommutable, IsKCommutable>; 257 258// This multiclass generates the unconditional/non-masking, the masking and 259// the zero-masking variant of the vector instruction. In the masking case, the 260// perserved vector elements come from a new dummy input operand tied to $dst. 261multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 262 dag Outs, dag Ins, string OpcodeStr, 263 string AttSrcAsm, string IntelSrcAsm, 264 dag RHS, 265 bit IsCommutable = 0, bit IsKCommutable = 0, 266 bit IsKZCommutable = IsCommutable, 267 SDNode Select = vselect> : 268 AVX512_maskable_common<O, F, _, Outs, Ins, 269 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 270 !con((ins _.KRCWM:$mask), Ins), 271 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 272 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 273 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 274 IsKZCommutable>; 275 276// This multiclass generates the unconditional/non-masking, the masking and 277// the zero-masking variant of the scalar instruction. 278multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 279 dag Outs, dag Ins, string OpcodeStr, 280 string AttSrcAsm, string IntelSrcAsm, 281 dag RHS> : 282 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 283 RHS, 0, 0, 0, X86selects>; 284 285// Similar to AVX512_maskable but in this case one of the source operands 286// ($src1) is already tied to $dst so we just use that for the preserved 287// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 288// $src1. 289multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 290 dag Outs, dag NonTiedIns, string OpcodeStr, 291 string AttSrcAsm, string IntelSrcAsm, 292 dag RHS, 293 bit IsCommutable = 0, 294 bit IsKCommutable = 0, 295 SDNode Select = vselect, 296 bit MaskOnly = 0> : 297 AVX512_maskable_common<O, F, _, Outs, 298 !con((ins _.RC:$src1), NonTiedIns), 299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 300 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 301 OpcodeStr, AttSrcAsm, IntelSrcAsm, 302 !if(MaskOnly, (null_frag), RHS), 303 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 304 Select, "", IsCommutable, IsKCommutable>; 305 306// Similar to AVX512_maskable_3src but in this case the input VT for the tied 307// operand differs from the output VT. This requires a bitconvert on 308// the preserved vector going into the vselect. 309// NOTE: The unmasked pattern is disabled. 310multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 311 X86VectorVTInfo InVT, 312 dag Outs, dag NonTiedIns, string OpcodeStr, 313 string AttSrcAsm, string IntelSrcAsm, 314 dag RHS, bit IsCommutable = 0> : 315 AVX512_maskable_common<O, F, OutVT, Outs, 316 !con((ins InVT.RC:$src1), NonTiedIns), 317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 318 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 319 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 320 (vselect InVT.KRCWM:$mask, RHS, 321 (bitconvert InVT.RC:$src1)), 322 vselect, "", IsCommutable>; 323 324multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 325 dag Outs, dag NonTiedIns, string OpcodeStr, 326 string AttSrcAsm, string IntelSrcAsm, 327 dag RHS, 328 bit IsCommutable = 0, 329 bit IsKCommutable = 0, 330 bit MaskOnly = 0> : 331 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 332 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 333 X86selects, MaskOnly>; 334 335multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 336 dag Outs, dag Ins, 337 string OpcodeStr, 338 string AttSrcAsm, string IntelSrcAsm, 339 list<dag> Pattern> : 340 AVX512_maskable_custom<O, F, Outs, Ins, 341 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 342 !con((ins _.KRCWM:$mask), Ins), 343 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 344 "$src0 = $dst">; 345 346multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 347 dag Outs, dag NonTiedIns, 348 string OpcodeStr, 349 string AttSrcAsm, string IntelSrcAsm, 350 list<dag> Pattern> : 351 AVX512_maskable_custom<O, F, Outs, 352 !con((ins _.RC:$src1), NonTiedIns), 353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 354 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 355 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 356 "">; 357 358// Instruction with mask that puts result in mask register, 359// like "compare" and "vptest" 360multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 361 dag Outs, 362 dag Ins, dag MaskingIns, 363 string OpcodeStr, 364 string AttSrcAsm, string IntelSrcAsm, 365 list<dag> Pattern, 366 list<dag> MaskingPattern, 367 bit IsCommutable = 0> { 368 let isCommutable = IsCommutable in { 369 def NAME: AVX512<O, F, Outs, Ins, 370 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 371 "$dst, "#IntelSrcAsm#"}", 372 Pattern>; 373 374 def NAME#k: AVX512<O, F, Outs, MaskingIns, 375 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 376 "$dst {${mask}}, "#IntelSrcAsm#"}", 377 MaskingPattern>, EVEX_K; 378 } 379} 380 381multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 382 dag Outs, 383 dag Ins, dag MaskingIns, 384 string OpcodeStr, 385 string AttSrcAsm, string IntelSrcAsm, 386 dag RHS, dag MaskingRHS, 387 bit IsCommutable = 0> : 388 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 389 AttSrcAsm, IntelSrcAsm, 390 [(set _.KRC:$dst, RHS)], 391 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 392 393multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 394 dag Outs, dag Ins, string OpcodeStr, 395 string AttSrcAsm, string IntelSrcAsm, 396 dag RHS, dag RHS_su, bit IsCommutable = 0> : 397 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 398 !con((ins _.KRCWM:$mask), Ins), 399 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 400 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 401 402 403// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 404// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 405// swizzled by ExecutionDomainFix to pxor. 406// We set canFoldAsLoad because this can be converted to a constant-pool 407// load of an all-zeros value if folding it would be beneficial. 408let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 409 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 410def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 411 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 412def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 413 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 414} 415 416let Predicates = [HasAVX512] in { 417def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 418def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 419def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 420def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 421def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 422} 423 424// Alias instructions that allow VPTERNLOG to be used with a mask to create 425// a mix of all ones and all zeros elements. This is done this way to force 426// the same register to be used as input for all three sources. 427let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 428def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 429 (ins VK16WM:$mask), "", 430 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 431 (v16i32 immAllOnesV), 432 (v16i32 immAllZerosV)))]>; 433def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 434 (ins VK8WM:$mask), "", 435 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 436 (v8i64 immAllOnesV), 437 (v8i64 immAllZerosV)))]>; 438} 439 440let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 441 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 442def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 443 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 444def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 445 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 446} 447 448let Predicates = [HasAVX512] in { 449def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 450def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 451def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 452def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 453def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 454def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 455def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 456def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 457def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 458def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 459} 460 461// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 462// This is expanded by ExpandPostRAPseudos. 463let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 464 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 465 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 466 [(set FR32X:$dst, fp32imm0)]>; 467 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 468 [(set FR64X:$dst, fp64imm0)]>; 469 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 470 [(set VR128X:$dst, fp128imm0)]>; 471} 472 473//===----------------------------------------------------------------------===// 474// AVX-512 - VECTOR INSERT 475// 476 477// Supports two different pattern operators for mask and unmasked ops. Allows 478// null_frag to be passed for one. 479multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 480 X86VectorVTInfo To, 481 SDPatternOperator vinsert_insert, 482 SDPatternOperator vinsert_for_mask, 483 X86FoldableSchedWrite sched> { 484 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 485 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 486 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 487 "vinsert" # From.EltTypeName # "x" # From.NumElts, 488 "$src3, $src2, $src1", "$src1, $src2, $src3", 489 (vinsert_insert:$src3 (To.VT To.RC:$src1), 490 (From.VT From.RC:$src2), 491 (iPTR imm)), 492 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 493 (From.VT From.RC:$src2), 494 (iPTR imm))>, 495 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 496 let mayLoad = 1 in 497 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 498 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 499 "vinsert" # From.EltTypeName # "x" # From.NumElts, 500 "$src3, $src2, $src1", "$src1, $src2, $src3", 501 (vinsert_insert:$src3 (To.VT To.RC:$src1), 502 (From.VT (From.LdFrag addr:$src2)), 503 (iPTR imm)), 504 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 505 (From.VT (From.LdFrag addr:$src2)), 506 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 507 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 508 Sched<[sched.Folded, sched.ReadAfterFold]>; 509 } 510} 511 512// Passes the same pattern operator for masked and unmasked ops. 513multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 514 X86VectorVTInfo To, 515 SDPatternOperator vinsert_insert, 516 X86FoldableSchedWrite sched> : 517 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 518 519multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 520 X86VectorVTInfo To, PatFrag vinsert_insert, 521 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 522 let Predicates = p in { 523 def : Pat<(vinsert_insert:$ins 524 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 525 (To.VT (!cast<Instruction>(InstrStr#"rr") 526 To.RC:$src1, From.RC:$src2, 527 (INSERT_get_vinsert_imm To.RC:$ins)))>; 528 529 def : Pat<(vinsert_insert:$ins 530 (To.VT To.RC:$src1), 531 (From.VT (From.LdFrag addr:$src2)), 532 (iPTR imm)), 533 (To.VT (!cast<Instruction>(InstrStr#"rm") 534 To.RC:$src1, addr:$src2, 535 (INSERT_get_vinsert_imm To.RC:$ins)))>; 536 } 537} 538 539multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 540 ValueType EltVT64, int Opcode256, 541 X86FoldableSchedWrite sched> { 542 543 let Predicates = [HasVLX] in 544 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 545 X86VectorVTInfo< 4, EltVT32, VR128X>, 546 X86VectorVTInfo< 8, EltVT32, VR256X>, 547 vinsert128_insert, sched>, EVEX_V256; 548 549 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 550 X86VectorVTInfo< 4, EltVT32, VR128X>, 551 X86VectorVTInfo<16, EltVT32, VR512>, 552 vinsert128_insert, sched>, EVEX_V512; 553 554 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 555 X86VectorVTInfo< 4, EltVT64, VR256X>, 556 X86VectorVTInfo< 8, EltVT64, VR512>, 557 vinsert256_insert, sched>, VEX_W, EVEX_V512; 558 559 // Even with DQI we'd like to only use these instructions for masking. 560 let Predicates = [HasVLX, HasDQI] in 561 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 562 X86VectorVTInfo< 2, EltVT64, VR128X>, 563 X86VectorVTInfo< 4, EltVT64, VR256X>, 564 null_frag, vinsert128_insert, sched>, 565 VEX_W1X, EVEX_V256; 566 567 // Even with DQI we'd like to only use these instructions for masking. 568 let Predicates = [HasDQI] in { 569 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 570 X86VectorVTInfo< 2, EltVT64, VR128X>, 571 X86VectorVTInfo< 8, EltVT64, VR512>, 572 null_frag, vinsert128_insert, sched>, 573 VEX_W, EVEX_V512; 574 575 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 576 X86VectorVTInfo< 8, EltVT32, VR256X>, 577 X86VectorVTInfo<16, EltVT32, VR512>, 578 null_frag, vinsert256_insert, sched>, 579 EVEX_V512; 580 } 581} 582 583// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 584defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 585defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 586 587// Codegen pattern with the alternative types, 588// Even with AVX512DQ we'll still use these for unmasked operations. 589defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 590 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 591defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 592 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 593 594defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 595 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 596defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 597 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 598 599defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 600 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 601defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 602 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 603 604// Codegen pattern with the alternative types insert VEC128 into VEC256 605defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 606 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 607defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 608 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 609// Codegen pattern with the alternative types insert VEC128 into VEC512 610defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 612defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 614// Codegen pattern with the alternative types insert VEC256 into VEC512 615defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 616 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 617defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 618 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 619 620 621multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 622 X86VectorVTInfo To, X86VectorVTInfo Cast, 623 PatFrag vinsert_insert, 624 SDNodeXForm INSERT_get_vinsert_imm, 625 list<Predicate> p> { 626let Predicates = p in { 627 def : Pat<(Cast.VT 628 (vselect Cast.KRCWM:$mask, 629 (bitconvert 630 (vinsert_insert:$ins (To.VT To.RC:$src1), 631 (From.VT From.RC:$src2), 632 (iPTR imm))), 633 Cast.RC:$src0)), 634 (!cast<Instruction>(InstrStr#"rrk") 635 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 636 (INSERT_get_vinsert_imm To.RC:$ins))>; 637 def : Pat<(Cast.VT 638 (vselect Cast.KRCWM:$mask, 639 (bitconvert 640 (vinsert_insert:$ins (To.VT To.RC:$src1), 641 (From.VT 642 (bitconvert 643 (From.LdFrag addr:$src2))), 644 (iPTR imm))), 645 Cast.RC:$src0)), 646 (!cast<Instruction>(InstrStr#"rmk") 647 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 648 (INSERT_get_vinsert_imm To.RC:$ins))>; 649 650 def : Pat<(Cast.VT 651 (vselect Cast.KRCWM:$mask, 652 (bitconvert 653 (vinsert_insert:$ins (To.VT To.RC:$src1), 654 (From.VT From.RC:$src2), 655 (iPTR imm))), 656 Cast.ImmAllZerosV)), 657 (!cast<Instruction>(InstrStr#"rrkz") 658 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 659 (INSERT_get_vinsert_imm To.RC:$ins))>; 660 def : Pat<(Cast.VT 661 (vselect Cast.KRCWM:$mask, 662 (bitconvert 663 (vinsert_insert:$ins (To.VT To.RC:$src1), 664 (From.VT (From.LdFrag addr:$src2)), 665 (iPTR imm))), 666 Cast.ImmAllZerosV)), 667 (!cast<Instruction>(InstrStr#"rmkz") 668 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 669 (INSERT_get_vinsert_imm To.RC:$ins))>; 670} 671} 672 673defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 674 v8f32x_info, vinsert128_insert, 675 INSERT_get_vinsert128_imm, [HasVLX]>; 676defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 677 v4f64x_info, vinsert128_insert, 678 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 679 680defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 681 v8i32x_info, vinsert128_insert, 682 INSERT_get_vinsert128_imm, [HasVLX]>; 683defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 684 v8i32x_info, vinsert128_insert, 685 INSERT_get_vinsert128_imm, [HasVLX]>; 686defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 687 v8i32x_info, vinsert128_insert, 688 INSERT_get_vinsert128_imm, [HasVLX]>; 689defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 690 v4i64x_info, vinsert128_insert, 691 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 692defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 693 v4i64x_info, vinsert128_insert, 694 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 695defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 696 v4i64x_info, vinsert128_insert, 697 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 698 699defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 700 v16f32_info, vinsert128_insert, 701 INSERT_get_vinsert128_imm, [HasAVX512]>; 702defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 703 v8f64_info, vinsert128_insert, 704 INSERT_get_vinsert128_imm, [HasDQI]>; 705 706defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 707 v16i32_info, vinsert128_insert, 708 INSERT_get_vinsert128_imm, [HasAVX512]>; 709defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 710 v16i32_info, vinsert128_insert, 711 INSERT_get_vinsert128_imm, [HasAVX512]>; 712defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 713 v16i32_info, vinsert128_insert, 714 INSERT_get_vinsert128_imm, [HasAVX512]>; 715defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 716 v8i64_info, vinsert128_insert, 717 INSERT_get_vinsert128_imm, [HasDQI]>; 718defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 719 v8i64_info, vinsert128_insert, 720 INSERT_get_vinsert128_imm, [HasDQI]>; 721defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 722 v8i64_info, vinsert128_insert, 723 INSERT_get_vinsert128_imm, [HasDQI]>; 724 725defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 726 v16f32_info, vinsert256_insert, 727 INSERT_get_vinsert256_imm, [HasDQI]>; 728defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 729 v8f64_info, vinsert256_insert, 730 INSERT_get_vinsert256_imm, [HasAVX512]>; 731 732defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 733 v16i32_info, vinsert256_insert, 734 INSERT_get_vinsert256_imm, [HasDQI]>; 735defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 736 v16i32_info, vinsert256_insert, 737 INSERT_get_vinsert256_imm, [HasDQI]>; 738defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 739 v16i32_info, vinsert256_insert, 740 INSERT_get_vinsert256_imm, [HasDQI]>; 741defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 742 v8i64_info, vinsert256_insert, 743 INSERT_get_vinsert256_imm, [HasAVX512]>; 744defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 745 v8i64_info, vinsert256_insert, 746 INSERT_get_vinsert256_imm, [HasAVX512]>; 747defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 748 v8i64_info, vinsert256_insert, 749 INSERT_get_vinsert256_imm, [HasAVX512]>; 750 751// vinsertps - insert f32 to XMM 752let ExeDomain = SSEPackedSingle in { 753let isCommutable = 1 in 754def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 755 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 756 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 757 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 758 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 759def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 760 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 761 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 762 [(set VR128X:$dst, (X86insertps VR128X:$src1, 763 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 764 timm:$src3))]>, 765 EVEX_4V, EVEX_CD8<32, CD8VT1>, 766 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 767} 768 769//===----------------------------------------------------------------------===// 770// AVX-512 VECTOR EXTRACT 771//--- 772 773// Supports two different pattern operators for mask and unmasked ops. Allows 774// null_frag to be passed for one. 775multiclass vextract_for_size_split<int Opcode, 776 X86VectorVTInfo From, X86VectorVTInfo To, 777 SDPatternOperator vextract_extract, 778 SDPatternOperator vextract_for_mask, 779 SchedWrite SchedRR, SchedWrite SchedMR> { 780 781 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 782 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 783 (ins From.RC:$src1, u8imm:$idx), 784 "vextract" # To.EltTypeName # "x" # To.NumElts, 785 "$idx, $src1", "$src1, $idx", 786 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 787 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 788 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 789 790 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 791 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 792 "vextract" # To.EltTypeName # "x" # To.NumElts # 793 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 794 [(store (To.VT (vextract_extract:$idx 795 (From.VT From.RC:$src1), (iPTR imm))), 796 addr:$dst)]>, EVEX, 797 Sched<[SchedMR]>; 798 799 let mayStore = 1, hasSideEffects = 0 in 800 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 801 (ins To.MemOp:$dst, To.KRCWM:$mask, 802 From.RC:$src1, u8imm:$idx), 803 "vextract" # To.EltTypeName # "x" # To.NumElts # 804 "\t{$idx, $src1, $dst {${mask}}|" 805 "$dst {${mask}}, $src1, $idx}", []>, 806 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 807 } 808} 809 810// Passes the same pattern operator for masked and unmasked ops. 811multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 812 X86VectorVTInfo To, 813 SDPatternOperator vextract_extract, 814 SchedWrite SchedRR, SchedWrite SchedMR> : 815 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 816 817// Codegen pattern for the alternative types 818multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 819 X86VectorVTInfo To, PatFrag vextract_extract, 820 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 821 let Predicates = p in { 822 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 823 (To.VT (!cast<Instruction>(InstrStr#"rr") 824 From.RC:$src1, 825 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 826 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 827 (iPTR imm))), addr:$dst), 828 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 829 (EXTRACT_get_vextract_imm To.RC:$ext))>; 830 } 831} 832 833multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 834 ValueType EltVT64, int Opcode256, 835 SchedWrite SchedRR, SchedWrite SchedMR> { 836 let Predicates = [HasAVX512] in { 837 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 838 X86VectorVTInfo<16, EltVT32, VR512>, 839 X86VectorVTInfo< 4, EltVT32, VR128X>, 840 vextract128_extract, SchedRR, SchedMR>, 841 EVEX_V512, EVEX_CD8<32, CD8VT4>; 842 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 843 X86VectorVTInfo< 8, EltVT64, VR512>, 844 X86VectorVTInfo< 4, EltVT64, VR256X>, 845 vextract256_extract, SchedRR, SchedMR>, 846 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 847 } 848 let Predicates = [HasVLX] in 849 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 850 X86VectorVTInfo< 8, EltVT32, VR256X>, 851 X86VectorVTInfo< 4, EltVT32, VR128X>, 852 vextract128_extract, SchedRR, SchedMR>, 853 EVEX_V256, EVEX_CD8<32, CD8VT4>; 854 855 // Even with DQI we'd like to only use these instructions for masking. 856 let Predicates = [HasVLX, HasDQI] in 857 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 858 X86VectorVTInfo< 4, EltVT64, VR256X>, 859 X86VectorVTInfo< 2, EltVT64, VR128X>, 860 null_frag, vextract128_extract, SchedRR, SchedMR>, 861 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 862 863 // Even with DQI we'd like to only use these instructions for masking. 864 let Predicates = [HasDQI] in { 865 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 866 X86VectorVTInfo< 8, EltVT64, VR512>, 867 X86VectorVTInfo< 2, EltVT64, VR128X>, 868 null_frag, vextract128_extract, SchedRR, SchedMR>, 869 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 870 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 871 X86VectorVTInfo<16, EltVT32, VR512>, 872 X86VectorVTInfo< 8, EltVT32, VR256X>, 873 null_frag, vextract256_extract, SchedRR, SchedMR>, 874 EVEX_V512, EVEX_CD8<32, CD8VT8>; 875 } 876} 877 878// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 879defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 880defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 881 882// extract_subvector codegen patterns with the alternative types. 883// Even with AVX512DQ we'll still use these for unmasked operations. 884defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 885 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 886defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 887 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 888 889defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 890 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 891defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 892 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 893 894defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 895 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 896defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 897 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 898 899// Codegen pattern with the alternative types extract VEC128 from VEC256 900defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 901 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 902defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 903 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 904 905// Codegen pattern with the alternative types extract VEC128 from VEC512 906defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 907 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 908defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 909 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 910// Codegen pattern with the alternative types extract VEC256 from VEC512 911defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 912 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 913defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 914 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 915 916 917// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 918// smaller extract to enable EVEX->VEX. 919let Predicates = [NoVLX] in { 920def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 921 (v2i64 (VEXTRACTI128rr 922 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 923 (iPTR 1)))>; 924def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 925 (v2f64 (VEXTRACTF128rr 926 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 927 (iPTR 1)))>; 928def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 929 (v4i32 (VEXTRACTI128rr 930 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 931 (iPTR 1)))>; 932def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 933 (v4f32 (VEXTRACTF128rr 934 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 935 (iPTR 1)))>; 936def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 937 (v8i16 (VEXTRACTI128rr 938 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 939 (iPTR 1)))>; 940def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 941 (v16i8 (VEXTRACTI128rr 942 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 943 (iPTR 1)))>; 944} 945 946// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 947// smaller extract to enable EVEX->VEX. 948let Predicates = [HasVLX] in { 949def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 950 (v2i64 (VEXTRACTI32x4Z256rr 951 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 952 (iPTR 1)))>; 953def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 954 (v2f64 (VEXTRACTF32x4Z256rr 955 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 956 (iPTR 1)))>; 957def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 958 (v4i32 (VEXTRACTI32x4Z256rr 959 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 960 (iPTR 1)))>; 961def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 962 (v4f32 (VEXTRACTF32x4Z256rr 963 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 964 (iPTR 1)))>; 965def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 966 (v8i16 (VEXTRACTI32x4Z256rr 967 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 968 (iPTR 1)))>; 969def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 970 (v16i8 (VEXTRACTI32x4Z256rr 971 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 972 (iPTR 1)))>; 973} 974 975 976// Additional patterns for handling a bitcast between the vselect and the 977// extract_subvector. 978multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 979 X86VectorVTInfo To, X86VectorVTInfo Cast, 980 PatFrag vextract_extract, 981 SDNodeXForm EXTRACT_get_vextract_imm, 982 list<Predicate> p> { 983let Predicates = p in { 984 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 985 (bitconvert 986 (To.VT (vextract_extract:$ext 987 (From.VT From.RC:$src), (iPTR imm)))), 988 To.RC:$src0)), 989 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 990 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 991 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 992 993 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 994 (bitconvert 995 (To.VT (vextract_extract:$ext 996 (From.VT From.RC:$src), (iPTR imm)))), 997 Cast.ImmAllZerosV)), 998 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 999 Cast.KRCWM:$mask, From.RC:$src, 1000 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1001} 1002} 1003 1004defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1005 v4f32x_info, vextract128_extract, 1006 EXTRACT_get_vextract128_imm, [HasVLX]>; 1007defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1008 v2f64x_info, vextract128_extract, 1009 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1010 1011defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1012 v4i32x_info, vextract128_extract, 1013 EXTRACT_get_vextract128_imm, [HasVLX]>; 1014defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1015 v4i32x_info, vextract128_extract, 1016 EXTRACT_get_vextract128_imm, [HasVLX]>; 1017defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1018 v4i32x_info, vextract128_extract, 1019 EXTRACT_get_vextract128_imm, [HasVLX]>; 1020defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1021 v2i64x_info, vextract128_extract, 1022 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1023defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1024 v2i64x_info, vextract128_extract, 1025 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1026defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1027 v2i64x_info, vextract128_extract, 1028 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1029 1030defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1031 v4f32x_info, vextract128_extract, 1032 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1033defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1034 v2f64x_info, vextract128_extract, 1035 EXTRACT_get_vextract128_imm, [HasDQI]>; 1036 1037defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1038 v4i32x_info, vextract128_extract, 1039 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1040defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1041 v4i32x_info, vextract128_extract, 1042 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1043defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1044 v4i32x_info, vextract128_extract, 1045 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1046defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1047 v2i64x_info, vextract128_extract, 1048 EXTRACT_get_vextract128_imm, [HasDQI]>; 1049defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1050 v2i64x_info, vextract128_extract, 1051 EXTRACT_get_vextract128_imm, [HasDQI]>; 1052defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1053 v2i64x_info, vextract128_extract, 1054 EXTRACT_get_vextract128_imm, [HasDQI]>; 1055 1056defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1057 v8f32x_info, vextract256_extract, 1058 EXTRACT_get_vextract256_imm, [HasDQI]>; 1059defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1060 v4f64x_info, vextract256_extract, 1061 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1062 1063defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1064 v8i32x_info, vextract256_extract, 1065 EXTRACT_get_vextract256_imm, [HasDQI]>; 1066defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1067 v8i32x_info, vextract256_extract, 1068 EXTRACT_get_vextract256_imm, [HasDQI]>; 1069defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1070 v8i32x_info, vextract256_extract, 1071 EXTRACT_get_vextract256_imm, [HasDQI]>; 1072defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1073 v4i64x_info, vextract256_extract, 1074 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1075defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1076 v4i64x_info, vextract256_extract, 1077 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1078defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1079 v4i64x_info, vextract256_extract, 1080 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1081 1082// vextractps - extract 32 bits from XMM 1083def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), 1084 (ins VR128X:$src1, u8imm:$src2), 1085 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1086 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1087 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1088 1089def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1090 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1091 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1092 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1093 addr:$dst)]>, 1094 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1095 1096//===---------------------------------------------------------------------===// 1097// AVX-512 BROADCAST 1098//--- 1099// broadcast with a scalar argument. 1100multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, 1101 string Name, 1102 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { 1103 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1104 (!cast<Instruction>(Name#DestInfo.ZSuffix#r) 1105 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1106 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1107 (X86VBroadcast SrcInfo.FRC:$src), 1108 DestInfo.RC:$src0)), 1109 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk) 1110 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1111 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1112 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1113 (X86VBroadcast SrcInfo.FRC:$src), 1114 DestInfo.ImmAllZerosV)), 1115 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz) 1116 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1117} 1118 1119// Split version to allow mask and broadcast node to be different types. This 1120// helps support the 32x2 broadcasts. 1121multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1122 string Name, 1123 SchedWrite SchedRR, SchedWrite SchedRM, 1124 X86VectorVTInfo MaskInfo, 1125 X86VectorVTInfo DestInfo, 1126 X86VectorVTInfo SrcInfo, 1127 bit IsConvertibleToThreeAddress, 1128 SDPatternOperator UnmaskedOp = X86VBroadcast, 1129 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1130 let hasSideEffects = 0 in 1131 def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1132 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1133 [(set MaskInfo.RC:$dst, 1134 (MaskInfo.VT 1135 (bitconvert 1136 (DestInfo.VT 1137 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1138 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1139 def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1140 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1141 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1142 "${dst} {${mask}} {z}, $src}"), 1143 [(set MaskInfo.RC:$dst, 1144 (vselect MaskInfo.KRCWM:$mask, 1145 (MaskInfo.VT 1146 (bitconvert 1147 (DestInfo.VT 1148 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1149 MaskInfo.ImmAllZerosV))], 1150 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1151 let Constraints = "$src0 = $dst" in 1152 def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1153 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1154 SrcInfo.RC:$src), 1155 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1156 "${dst} {${mask}}, $src}"), 1157 [(set MaskInfo.RC:$dst, 1158 (vselect MaskInfo.KRCWM:$mask, 1159 (MaskInfo.VT 1160 (bitconvert 1161 (DestInfo.VT 1162 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1163 MaskInfo.RC:$src0))], 1164 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1165 1166 let hasSideEffects = 0, mayLoad = 1 in 1167 def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1168 (ins SrcInfo.ScalarMemOp:$src), 1169 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1170 [(set MaskInfo.RC:$dst, 1171 (MaskInfo.VT 1172 (bitconvert 1173 (DestInfo.VT 1174 (UnmaskedBcastOp addr:$src)))))], 1175 DestInfo.ExeDomain>, T8PD, EVEX, 1176 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1177 1178 def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1179 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1180 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1181 "${dst} {${mask}} {z}, $src}"), 1182 [(set MaskInfo.RC:$dst, 1183 (vselect MaskInfo.KRCWM:$mask, 1184 (MaskInfo.VT 1185 (bitconvert 1186 (DestInfo.VT 1187 (SrcInfo.BroadcastLdFrag addr:$src)))), 1188 MaskInfo.ImmAllZerosV))], 1189 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1190 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1191 1192 let Constraints = "$src0 = $dst", 1193 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1194 def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1195 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1196 SrcInfo.ScalarMemOp:$src), 1197 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1198 "${dst} {${mask}}, $src}"), 1199 [(set MaskInfo.RC:$dst, 1200 (vselect MaskInfo.KRCWM:$mask, 1201 (MaskInfo.VT 1202 (bitconvert 1203 (DestInfo.VT 1204 (SrcInfo.BroadcastLdFrag addr:$src)))), 1205 MaskInfo.RC:$src0))], 1206 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1207 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1208} 1209 1210// Helper class to force mask and broadcast result to same type. 1211multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, 1212 SchedWrite SchedRR, SchedWrite SchedRM, 1213 X86VectorVTInfo DestInfo, 1214 X86VectorVTInfo SrcInfo, 1215 bit IsConvertibleToThreeAddress> : 1216 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, 1217 DestInfo, DestInfo, SrcInfo, 1218 IsConvertibleToThreeAddress>; 1219 1220multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1221 AVX512VLVectorVTInfo _> { 1222 let Predicates = [HasAVX512] in { 1223 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1224 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1225 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1226 _.info128>, 1227 EVEX_V512; 1228 } 1229 1230 let Predicates = [HasVLX] in { 1231 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1232 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1233 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1234 _.info128>, 1235 EVEX_V256; 1236 } 1237} 1238 1239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1240 AVX512VLVectorVTInfo _> { 1241 let Predicates = [HasAVX512] in { 1242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1243 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1244 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1245 _.info128>, 1246 EVEX_V512; 1247 } 1248 1249 let Predicates = [HasVLX] in { 1250 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1251 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1252 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1253 _.info128>, 1254 EVEX_V256; 1255 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1256 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1257 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, 1258 _.info128>, 1259 EVEX_V128; 1260 } 1261} 1262defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1263 avx512vl_f32_info>; 1264defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1265 avx512vl_f64_info>, VEX_W1X; 1266 1267multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1268 X86VectorVTInfo _, SDPatternOperator OpNode, 1269 RegisterClass SrcRC> { 1270 let ExeDomain = _.ExeDomain in 1271 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1272 (ins SrcRC:$src), 1273 "vpbroadcast"##_.Suffix, "$src", "$src", 1274 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX, 1275 Sched<[SchedRR]>; 1276} 1277 1278multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1279 X86VectorVTInfo _, SDPatternOperator OpNode, 1280 RegisterClass SrcRC, SubRegIndex Subreg> { 1281 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1282 defm r : AVX512_maskable_custom<opc, MRMSrcReg, 1283 (outs _.RC:$dst), (ins GR32:$src), 1284 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1285 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1286 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], 1287 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1288 1289 def : Pat <(_.VT (OpNode SrcRC:$src)), 1290 (!cast<Instruction>(Name#r) 1291 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1292 1293 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1294 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, 1295 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1296 1297 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1298 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, 1299 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1300} 1301 1302multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1303 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1304 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1305 let Predicates = [prd] in 1306 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1307 OpNode, SrcRC, Subreg>, EVEX_V512; 1308 let Predicates = [prd, HasVLX] in { 1309 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1310 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1311 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1312 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1313 } 1314} 1315 1316multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1317 SDPatternOperator OpNode, 1318 RegisterClass SrcRC, Predicate prd> { 1319 let Predicates = [prd] in 1320 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1321 SrcRC>, EVEX_V512; 1322 let Predicates = [prd, HasVLX] in { 1323 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1324 SrcRC>, EVEX_V256; 1325 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1326 SrcRC>, EVEX_V128; 1327 } 1328} 1329 1330defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1331 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1332defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1333 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1334 HasBWI>; 1335defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1336 X86VBroadcast, GR32, HasAVX512>; 1337defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1338 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1339 1340multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1341 AVX512VLVectorVTInfo _, Predicate prd, 1342 bit IsConvertibleToThreeAddress> { 1343 let Predicates = [prd] in { 1344 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1345 WriteShuffle256Ld, _.info512, _.info128, 1346 IsConvertibleToThreeAddress>, 1347 EVEX_V512; 1348 } 1349 let Predicates = [prd, HasVLX] in { 1350 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1351 WriteShuffle256Ld, _.info256, _.info128, 1352 IsConvertibleToThreeAddress>, 1353 EVEX_V256; 1354 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, 1355 WriteShuffleXLd, _.info128, _.info128, 1356 IsConvertibleToThreeAddress>, 1357 EVEX_V128; 1358 } 1359} 1360 1361defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1362 avx512vl_i8_info, HasBWI, 0>; 1363defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1364 avx512vl_i16_info, HasBWI, 0>; 1365defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1366 avx512vl_i32_info, HasAVX512, 1>; 1367defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1368 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1369 1370multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1371 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1372 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1373 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1374 (_Dst.VT (X86SubVBroadcast 1375 (_Src.VT (_Src.LdFrag addr:$src))))>, 1376 Sched<[SchedWriteShuffle.YMM.Folded]>, 1377 AVX5128IBase, EVEX; 1378} 1379 1380// This should be used for the AVX512DQ broadcast instructions. It disables 1381// the unmasked patterns so that we only use the DQ instructions when masking 1382// is requested. 1383multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1384 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1385 let hasSideEffects = 0, mayLoad = 1 in 1386 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1387 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1388 (null_frag), 1389 (_Dst.VT (X86SubVBroadcast 1390 (_Src.VT (_Src.LdFrag addr:$src))))>, 1391 Sched<[SchedWriteShuffle.YMM.Folded]>, 1392 AVX5128IBase, EVEX; 1393} 1394 1395let Predicates = [HasAVX512] in { 1396 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1397 def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1398 (VPBROADCASTQZm addr:$src)>; 1399 1400 // FIXME this is to handle aligned extloads from i8. 1401 def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))), 1402 (VPBROADCASTDZm addr:$src)>; 1403} 1404 1405let Predicates = [HasVLX] in { 1406 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1407 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1408 (VPBROADCASTQZ128m addr:$src)>; 1409 def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1410 (VPBROADCASTQZ256m addr:$src)>; 1411 1412 // FIXME this is to handle aligned extloads from i8. 1413 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 1414 (VPBROADCASTDZ128m addr:$src)>; 1415 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 1416 (VPBROADCASTDZ256m addr:$src)>; 1417} 1418let Predicates = [HasVLX, HasBWI] in { 1419 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 1420 // This means we'll encounter truncated i32 loads; match that here. 1421 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1422 (VPBROADCASTWZ128m addr:$src)>; 1423 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1424 (VPBROADCASTWZ256m addr:$src)>; 1425 def : Pat<(v8i16 (X86VBroadcast 1426 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1427 (VPBROADCASTWZ128m addr:$src)>; 1428 def : Pat<(v8i16 (X86VBroadcast 1429 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1430 (VPBROADCASTWZ128m addr:$src)>; 1431 def : Pat<(v16i16 (X86VBroadcast 1432 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1433 (VPBROADCASTWZ256m addr:$src)>; 1434 def : Pat<(v16i16 (X86VBroadcast 1435 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1436 (VPBROADCASTWZ256m addr:$src)>; 1437 1438 // FIXME this is to handle aligned extloads from i8. 1439 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), 1440 (VPBROADCASTWZ128m addr:$src)>; 1441 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), 1442 (VPBROADCASTWZ256m addr:$src)>; 1443} 1444let Predicates = [HasBWI] in { 1445 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 1446 // This means we'll encounter truncated i32 loads; match that here. 1447 def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1448 (VPBROADCASTWZm addr:$src)>; 1449 def : Pat<(v32i16 (X86VBroadcast 1450 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1451 (VPBROADCASTWZm addr:$src)>; 1452 def : Pat<(v32i16 (X86VBroadcast 1453 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1454 (VPBROADCASTWZm addr:$src)>; 1455 1456 // FIXME this is to handle aligned extloads from i8. 1457 def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))), 1458 (VPBROADCASTWZm addr:$src)>; 1459} 1460 1461//===----------------------------------------------------------------------===// 1462// AVX-512 BROADCAST SUBVECTORS 1463// 1464 1465defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1466 v16i32_info, v4i32x_info>, 1467 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1468defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1469 v16f32_info, v4f32x_info>, 1470 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1471defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1472 v8i64_info, v4i64x_info>, VEX_W, 1473 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1474defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1475 v8f64_info, v4f64x_info>, VEX_W, 1476 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1477 1478let Predicates = [HasAVX512] in { 1479def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))), 1480 (VBROADCASTF64X4rm addr:$src)>; 1481def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))), 1482 (VBROADCASTI64X4rm addr:$src)>; 1483def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))), 1484 (VBROADCASTI64X4rm addr:$src)>; 1485def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))), 1486 (VBROADCASTI64X4rm addr:$src)>; 1487 1488// Provide fallback in case the load node that is used in the patterns above 1489// is used by additional users, which prevents the pattern selection. 1490def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), 1491 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1492 (v4f64 VR256X:$src), 1)>; 1493def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), 1494 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1495 (v8f32 VR256X:$src), 1)>; 1496def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), 1497 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1498 (v4i64 VR256X:$src), 1)>; 1499def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), 1500 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1501 (v8i32 VR256X:$src), 1)>; 1502def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))), 1503 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1504 (v16i16 VR256X:$src), 1)>; 1505def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))), 1506 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1507 (v32i8 VR256X:$src), 1)>; 1508 1509def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1510 (VBROADCASTF32X4rm addr:$src)>; 1511def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1512 (VBROADCASTI32X4rm addr:$src)>; 1513def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 1514 (VBROADCASTI32X4rm addr:$src)>; 1515def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 1516 (VBROADCASTI32X4rm addr:$src)>; 1517 1518// Patterns for selects of bitcasted operations. 1519def : Pat<(vselect VK16WM:$mask, 1520 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1521 (v16f32 immAllZerosV)), 1522 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1523def : Pat<(vselect VK16WM:$mask, 1524 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1525 VR512:$src0), 1526 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1527def : Pat<(vselect VK16WM:$mask, 1528 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1529 (v16i32 immAllZerosV)), 1530 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1531def : Pat<(vselect VK16WM:$mask, 1532 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1533 VR512:$src0), 1534 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1535 1536def : Pat<(vselect VK8WM:$mask, 1537 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1538 (v8f64 immAllZerosV)), 1539 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1540def : Pat<(vselect VK8WM:$mask, 1541 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1542 VR512:$src0), 1543 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1544def : Pat<(vselect VK8WM:$mask, 1545 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), 1546 (v8i64 immAllZerosV)), 1547 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1548def : Pat<(vselect VK8WM:$mask, 1549 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), 1550 VR512:$src0), 1551 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1552} 1553 1554let Predicates = [HasVLX] in { 1555defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1556 v8i32x_info, v4i32x_info>, 1557 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1558defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1559 v8f32x_info, v4f32x_info>, 1560 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1561 1562def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1563 (VBROADCASTF32X4Z256rm addr:$src)>; 1564def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1565 (VBROADCASTI32X4Z256rm addr:$src)>; 1566def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 1567 (VBROADCASTI32X4Z256rm addr:$src)>; 1568def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 1569 (VBROADCASTI32X4Z256rm addr:$src)>; 1570 1571// Patterns for selects of bitcasted operations. 1572def : Pat<(vselect VK8WM:$mask, 1573 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1574 (v8f32 immAllZerosV)), 1575 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1576def : Pat<(vselect VK8WM:$mask, 1577 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1578 VR256X:$src0), 1579 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1580def : Pat<(vselect VK8WM:$mask, 1581 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1582 (v8i32 immAllZerosV)), 1583 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1584def : Pat<(vselect VK8WM:$mask, 1585 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1586 VR256X:$src0), 1587 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1588 1589 1590// Provide fallback in case the load node that is used in the patterns above 1591// is used by additional users, which prevents the pattern selection. 1592def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 1593 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1594 (v2f64 VR128X:$src), 1)>; 1595def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 1596 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1597 (v4f32 VR128X:$src), 1)>; 1598def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 1599 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1600 (v2i64 VR128X:$src), 1)>; 1601def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 1602 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1603 (v4i32 VR128X:$src), 1)>; 1604def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 1605 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1606 (v8i16 VR128X:$src), 1)>; 1607def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 1608 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1609 (v16i8 VR128X:$src), 1)>; 1610} 1611 1612let Predicates = [HasVLX, HasDQI] in { 1613defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1614 v4i64x_info, v2i64x_info>, VEX_W1X, 1615 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1616defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1617 v4f64x_info, v2f64x_info>, VEX_W1X, 1618 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1619 1620// Patterns for selects of bitcasted operations. 1621def : Pat<(vselect VK4WM:$mask, 1622 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1623 (v4f64 immAllZerosV)), 1624 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1625def : Pat<(vselect VK4WM:$mask, 1626 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1627 VR256X:$src0), 1628 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1629def : Pat<(vselect VK4WM:$mask, 1630 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1631 (v4i64 immAllZerosV)), 1632 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1633def : Pat<(vselect VK4WM:$mask, 1634 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1635 VR256X:$src0), 1636 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1637} 1638 1639let Predicates = [HasDQI] in { 1640defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1641 v8i64_info, v2i64x_info>, VEX_W, 1642 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1643defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1644 v16i32_info, v8i32x_info>, 1645 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1646defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1647 v8f64_info, v2f64x_info>, VEX_W, 1648 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1649defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1650 v16f32_info, v8f32x_info>, 1651 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1652 1653// Patterns for selects of bitcasted operations. 1654def : Pat<(vselect VK16WM:$mask, 1655 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1656 (v16f32 immAllZerosV)), 1657 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1658def : Pat<(vselect VK16WM:$mask, 1659 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1660 VR512:$src0), 1661 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1662def : Pat<(vselect VK16WM:$mask, 1663 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1664 (v16i32 immAllZerosV)), 1665 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1666def : Pat<(vselect VK16WM:$mask, 1667 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1668 VR512:$src0), 1669 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1670 1671def : Pat<(vselect VK8WM:$mask, 1672 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1673 (v8f64 immAllZerosV)), 1674 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1675def : Pat<(vselect VK8WM:$mask, 1676 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1677 VR512:$src0), 1678 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1679def : Pat<(vselect VK8WM:$mask, 1680 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1681 (v8i64 immAllZerosV)), 1682 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1683def : Pat<(vselect VK8WM:$mask, 1684 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1685 VR512:$src0), 1686 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1687} 1688 1689multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1690 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { 1691 let Predicates = [HasDQI] in 1692 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1693 WriteShuffle256Ld, _Dst.info512, 1694 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1695 EVEX_V512; 1696 let Predicates = [HasDQI, HasVLX] in 1697 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1698 WriteShuffle256Ld, _Dst.info256, 1699 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1700 EVEX_V256; 1701} 1702 1703multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1704 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : 1705 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1706 1707 let Predicates = [HasDQI, HasVLX] in 1708 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, 1709 WriteShuffleXLd, _Dst.info128, 1710 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1711 EVEX_V128; 1712} 1713 1714defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1715 avx512vl_i32_info, avx512vl_i64_info>; 1716defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1717 avx512vl_f32_info, avx512vl_f64_info>; 1718 1719//===----------------------------------------------------------------------===// 1720// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1721//--- 1722multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1723 X86VectorVTInfo _, RegisterClass KRC> { 1724 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1725 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1726 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1727 EVEX, Sched<[WriteShuffle]>; 1728} 1729 1730multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1731 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1732 let Predicates = [HasCDI] in 1733 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1734 let Predicates = [HasCDI, HasVLX] in { 1735 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1736 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1737 } 1738} 1739 1740defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1741 avx512vl_i32_info, VK16>; 1742defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1743 avx512vl_i64_info, VK8>, VEX_W; 1744 1745//===----------------------------------------------------------------------===// 1746// -- VPERMI2 - 3 source operands form -- 1747multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1748 X86FoldableSchedWrite sched, 1749 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1750let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1751 hasSideEffects = 0 in { 1752 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1753 (ins _.RC:$src2, _.RC:$src3), 1754 OpcodeStr, "$src3, $src2", "$src2, $src3", 1755 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1756 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1757 1758 let mayLoad = 1 in 1759 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1760 (ins _.RC:$src2, _.MemOp:$src3), 1761 OpcodeStr, "$src3, $src2", "$src2, $src3", 1762 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1763 (_.VT (_.LdFrag addr:$src3)))), 1>, 1764 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1765 } 1766} 1767 1768multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1769 X86FoldableSchedWrite sched, 1770 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1771 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1772 hasSideEffects = 0, mayLoad = 1 in 1773 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1774 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1775 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1776 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1777 (_.VT (X86VPermt2 _.RC:$src2, 1778 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1779 AVX5128IBase, EVEX_4V, EVEX_B, 1780 Sched<[sched.Folded, sched.ReadAfterFold]>; 1781} 1782 1783multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1784 X86FoldableSchedWrite sched, 1785 AVX512VLVectorVTInfo VTInfo, 1786 AVX512VLVectorVTInfo ShuffleMask> { 1787 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1788 ShuffleMask.info512>, 1789 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1790 ShuffleMask.info512>, EVEX_V512; 1791 let Predicates = [HasVLX] in { 1792 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1793 ShuffleMask.info128>, 1794 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1795 ShuffleMask.info128>, EVEX_V128; 1796 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1797 ShuffleMask.info256>, 1798 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1799 ShuffleMask.info256>, EVEX_V256; 1800 } 1801} 1802 1803multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1804 X86FoldableSchedWrite sched, 1805 AVX512VLVectorVTInfo VTInfo, 1806 AVX512VLVectorVTInfo Idx, 1807 Predicate Prd> { 1808 let Predicates = [Prd] in 1809 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1810 Idx.info512>, EVEX_V512; 1811 let Predicates = [Prd, HasVLX] in { 1812 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1813 Idx.info128>, EVEX_V128; 1814 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1815 Idx.info256>, EVEX_V256; 1816 } 1817} 1818 1819defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1820 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1821defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1822 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1823defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1824 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1825 VEX_W, EVEX_CD8<16, CD8VF>; 1826defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1827 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1828 EVEX_CD8<8, CD8VF>; 1829defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1830 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1831defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1832 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1833 1834// Extra patterns to deal with extra bitcasts due to passthru and index being 1835// different types on the fp versions. 1836multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1837 X86VectorVTInfo IdxVT, 1838 X86VectorVTInfo CastVT> { 1839 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1840 (X86VPermt2 (_.VT _.RC:$src2), 1841 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3), 1842 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1843 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1844 _.RC:$src2, _.RC:$src3)>; 1845 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1846 (X86VPermt2 _.RC:$src2, 1847 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1848 (_.LdFrag addr:$src3)), 1849 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1850 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1851 _.RC:$src2, addr:$src3)>; 1852 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1853 (X86VPermt2 _.RC:$src2, 1854 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1855 (_.BroadcastLdFrag addr:$src3)), 1856 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1857 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1858 _.RC:$src2, addr:$src3)>; 1859} 1860 1861// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1862defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1863defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1864defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1865 1866// VPERMT2 1867multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1868 X86FoldableSchedWrite sched, 1869 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1870let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1871 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1872 (ins IdxVT.RC:$src2, _.RC:$src3), 1873 OpcodeStr, "$src3, $src2", "$src2, $src3", 1874 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1875 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1876 1877 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1878 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1879 OpcodeStr, "$src3, $src2", "$src2, $src3", 1880 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1881 (_.LdFrag addr:$src3))), 1>, 1882 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1883 } 1884} 1885multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1886 X86FoldableSchedWrite sched, 1887 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1888 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1889 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1890 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1891 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1892 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1893 (_.VT (X86VPermt2 _.RC:$src1, 1894 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1895 AVX5128IBase, EVEX_4V, EVEX_B, 1896 Sched<[sched.Folded, sched.ReadAfterFold]>; 1897} 1898 1899multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1900 X86FoldableSchedWrite sched, 1901 AVX512VLVectorVTInfo VTInfo, 1902 AVX512VLVectorVTInfo ShuffleMask> { 1903 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1904 ShuffleMask.info512>, 1905 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1906 ShuffleMask.info512>, EVEX_V512; 1907 let Predicates = [HasVLX] in { 1908 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1909 ShuffleMask.info128>, 1910 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1911 ShuffleMask.info128>, EVEX_V128; 1912 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1913 ShuffleMask.info256>, 1914 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1915 ShuffleMask.info256>, EVEX_V256; 1916 } 1917} 1918 1919multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1920 X86FoldableSchedWrite sched, 1921 AVX512VLVectorVTInfo VTInfo, 1922 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1923 let Predicates = [Prd] in 1924 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1925 Idx.info512>, EVEX_V512; 1926 let Predicates = [Prd, HasVLX] in { 1927 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1928 Idx.info128>, EVEX_V128; 1929 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1930 Idx.info256>, EVEX_V256; 1931 } 1932} 1933 1934defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1935 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1936defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1937 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1938defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1939 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1940 VEX_W, EVEX_CD8<16, CD8VF>; 1941defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1942 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1943 EVEX_CD8<8, CD8VF>; 1944defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1945 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1946defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1947 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1948 1949//===----------------------------------------------------------------------===// 1950// AVX-512 - BLEND using mask 1951// 1952 1953multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1954 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1955 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1956 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1957 (ins _.RC:$src1, _.RC:$src2), 1958 !strconcat(OpcodeStr, 1959 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1960 EVEX_4V, Sched<[sched]>; 1961 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1962 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1963 !strconcat(OpcodeStr, 1964 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1965 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1966 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1967 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1968 !strconcat(OpcodeStr, 1969 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1970 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1971 let mayLoad = 1 in { 1972 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1973 (ins _.RC:$src1, _.MemOp:$src2), 1974 !strconcat(OpcodeStr, 1975 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1976 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1977 Sched<[sched.Folded, sched.ReadAfterFold]>; 1978 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1979 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1980 !strconcat(OpcodeStr, 1981 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1982 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1983 Sched<[sched.Folded, sched.ReadAfterFold]>; 1984 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1985 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1986 !strconcat(OpcodeStr, 1987 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1988 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1989 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1990 } 1991 } 1992} 1993multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1994 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1995 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1996 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1997 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1998 !strconcat(OpcodeStr, 1999 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2000 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2001 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2002 Sched<[sched.Folded, sched.ReadAfterFold]>; 2003 2004 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2005 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2006 !strconcat(OpcodeStr, 2007 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 2008 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2009 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2010 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2011 2012 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2013 (ins _.RC:$src1, _.ScalarMemOp:$src2), 2014 !strconcat(OpcodeStr, 2015 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2016 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2017 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2018 Sched<[sched.Folded, sched.ReadAfterFold]>; 2019 } 2020} 2021 2022multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2023 AVX512VLVectorVTInfo VTInfo> { 2024 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2025 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2026 EVEX_V512; 2027 2028 let Predicates = [HasVLX] in { 2029 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2030 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2031 EVEX_V256; 2032 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2033 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2034 EVEX_V128; 2035 } 2036} 2037 2038multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2039 AVX512VLVectorVTInfo VTInfo> { 2040 let Predicates = [HasBWI] in 2041 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2042 EVEX_V512; 2043 2044 let Predicates = [HasBWI, HasVLX] in { 2045 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2046 EVEX_V256; 2047 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2048 EVEX_V128; 2049 } 2050} 2051 2052defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2053 avx512vl_f32_info>; 2054defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2055 avx512vl_f64_info>, VEX_W; 2056defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2057 avx512vl_i32_info>; 2058defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2059 avx512vl_i64_info>, VEX_W; 2060defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2061 avx512vl_i8_info>; 2062defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2063 avx512vl_i16_info>, VEX_W; 2064 2065//===----------------------------------------------------------------------===// 2066// Compare Instructions 2067//===----------------------------------------------------------------------===// 2068 2069// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2070 2071multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2072 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2073 X86FoldableSchedWrite sched> { 2074 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2075 (outs _.KRC:$dst), 2076 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2077 "vcmp"#_.Suffix, 2078 "$cc, $src2, $src1", "$src1, $src2, $cc", 2079 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2080 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2081 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>; 2082 let mayLoad = 1 in 2083 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2084 (outs _.KRC:$dst), 2085 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2086 "vcmp"#_.Suffix, 2087 "$cc, $src2, $src1", "$src1, $src2, $cc", 2088 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 2089 timm:$cc), 2090 (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 2091 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2092 Sched<[sched.Folded, sched.ReadAfterFold]>; 2093 2094 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2095 (outs _.KRC:$dst), 2096 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2097 "vcmp"#_.Suffix, 2098 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2099 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2100 timm:$cc), 2101 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2102 timm:$cc)>, 2103 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2104 2105 let isCodeGenOnly = 1 in { 2106 let isCommutable = 1 in 2107 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2108 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2109 !strconcat("vcmp", _.Suffix, 2110 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2111 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2112 _.FRC:$src2, 2113 timm:$cc))]>, 2114 EVEX_4V, VEX_LIG, Sched<[sched]>; 2115 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2116 (outs _.KRC:$dst), 2117 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2118 !strconcat("vcmp", _.Suffix, 2119 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2120 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2121 (_.ScalarLdFrag addr:$src2), 2122 timm:$cc))]>, 2123 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2124 Sched<[sched.Folded, sched.ReadAfterFold]>; 2125 } 2126} 2127 2128def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2129 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2130 return N->hasOneUse(); 2131}]>; 2132def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2133 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2134 return N->hasOneUse(); 2135}]>; 2136 2137let Predicates = [HasAVX512] in { 2138 let ExeDomain = SSEPackedSingle in 2139 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2140 X86cmpms_su, X86cmpmsSAE_su, 2141 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2142 let ExeDomain = SSEPackedDouble in 2143 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2144 X86cmpms_su, X86cmpmsSAE_su, 2145 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2146} 2147 2148multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2149 X86FoldableSchedWrite sched, 2150 X86VectorVTInfo _, bit IsCommutable> { 2151 let isCommutable = IsCommutable, hasSideEffects = 0 in 2152 def rr : AVX512BI<opc, MRMSrcReg, 2153 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2154 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2155 []>, EVEX_4V, Sched<[sched]>; 2156 let mayLoad = 1, hasSideEffects = 0 in 2157 def rm : AVX512BI<opc, MRMSrcMem, 2158 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2159 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2160 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2161 let isCommutable = IsCommutable, hasSideEffects = 0 in 2162 def rrk : AVX512BI<opc, MRMSrcReg, 2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2164 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2165 "$dst {${mask}}, $src1, $src2}"), 2166 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2167 let mayLoad = 1, hasSideEffects = 0 in 2168 def rmk : AVX512BI<opc, MRMSrcMem, 2169 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2170 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2171 "$dst {${mask}}, $src1, $src2}"), 2172 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2173} 2174 2175multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2176 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2177 bit IsCommutable> : 2178 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2179 let mayLoad = 1, hasSideEffects = 0 in { 2180 def rmb : AVX512BI<opc, MRMSrcMem, 2181 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2182 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2183 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2184 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2185 def rmbk : AVX512BI<opc, MRMSrcMem, 2186 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2187 _.ScalarMemOp:$src2), 2188 !strconcat(OpcodeStr, 2189 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2190 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2191 []>, EVEX_4V, EVEX_K, EVEX_B, 2192 Sched<[sched.Folded, sched.ReadAfterFold]>; 2193 } 2194} 2195 2196multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2197 X86SchedWriteWidths sched, 2198 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2199 bit IsCommutable = 0> { 2200 let Predicates = [prd] in 2201 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2202 VTInfo.info512, IsCommutable>, EVEX_V512; 2203 2204 let Predicates = [prd, HasVLX] in { 2205 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2206 VTInfo.info256, IsCommutable>, EVEX_V256; 2207 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2208 VTInfo.info128, IsCommutable>, EVEX_V128; 2209 } 2210} 2211 2212multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2213 X86SchedWriteWidths sched, 2214 AVX512VLVectorVTInfo VTInfo, 2215 Predicate prd, bit IsCommutable = 0> { 2216 let Predicates = [prd] in 2217 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2218 VTInfo.info512, IsCommutable>, EVEX_V512; 2219 2220 let Predicates = [prd, HasVLX] in { 2221 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2222 VTInfo.info256, IsCommutable>, EVEX_V256; 2223 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2224 VTInfo.info128, IsCommutable>, EVEX_V128; 2225 } 2226} 2227 2228// This fragment treats X86cmpm as commutable to help match loads in both 2229// operands for PCMPEQ. 2230def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2231def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2232 (setcc node:$src1, node:$src2, SETGT)>; 2233 2234// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2235// increase the pattern complexity the way an immediate would. 2236let AddedComplexity = 2 in { 2237// FIXME: Is there a better scheduler class for VPCMP? 2238defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2239 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2240 EVEX_CD8<8, CD8VF>, VEX_WIG; 2241 2242defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2243 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2244 EVEX_CD8<16, CD8VF>, VEX_WIG; 2245 2246defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2247 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2248 EVEX_CD8<32, CD8VF>; 2249 2250defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2251 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2252 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2253 2254defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2255 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2256 EVEX_CD8<8, CD8VF>, VEX_WIG; 2257 2258defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2259 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2260 EVEX_CD8<16, CD8VF>, VEX_WIG; 2261 2262defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2263 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2264 EVEX_CD8<32, CD8VF>; 2265 2266defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2267 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2268 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2269} 2270 2271multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2272 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su, 2273 X86FoldableSchedWrite sched, 2274 X86VectorVTInfo _, string Name> { 2275 let isCommutable = 1 in 2276 def rri : AVX512AIi8<opc, MRMSrcReg, 2277 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2278 !strconcat("vpcmp", Suffix, 2279 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2280 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2281 (_.VT _.RC:$src2), 2282 cond)))]>, 2283 EVEX_4V, Sched<[sched]>; 2284 def rmi : AVX512AIi8<opc, MRMSrcMem, 2285 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2286 !strconcat("vpcmp", Suffix, 2287 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2288 [(set _.KRC:$dst, (_.KVT 2289 (Frag:$cc 2290 (_.VT _.RC:$src1), 2291 (_.VT (_.LdFrag addr:$src2)), 2292 cond)))]>, 2293 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2294 let isCommutable = 1 in 2295 def rrik : AVX512AIi8<opc, MRMSrcReg, 2296 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2297 u8imm:$cc), 2298 !strconcat("vpcmp", Suffix, 2299 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2300 "$dst {${mask}}, $src1, $src2, $cc}"), 2301 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2302 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2303 (_.VT _.RC:$src2), 2304 cond))))]>, 2305 EVEX_4V, EVEX_K, Sched<[sched]>; 2306 def rmik : AVX512AIi8<opc, MRMSrcMem, 2307 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2308 u8imm:$cc), 2309 !strconcat("vpcmp", Suffix, 2310 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2311 "$dst {${mask}}, $src1, $src2, $cc}"), 2312 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2313 (_.KVT 2314 (Frag_su:$cc 2315 (_.VT _.RC:$src1), 2316 (_.VT (_.LdFrag addr:$src2)), 2317 cond))))]>, 2318 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2319 2320 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2), 2321 (_.VT _.RC:$src1), cond)), 2322 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2323 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2324 2325 def : Pat<(and _.KRCWM:$mask, 2326 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2), 2327 (_.VT _.RC:$src1), cond))), 2328 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2329 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2330 (CommFrag.OperandTransform $cc))>; 2331} 2332 2333multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2334 PatFrag Frag_su, PatFrag CommFrag, 2335 PatFrag CommFrag_su, X86FoldableSchedWrite sched, 2336 X86VectorVTInfo _, string Name> : 2337 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2338 sched, _, Name> { 2339 def rmib : AVX512AIi8<opc, MRMSrcMem, 2340 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2341 u8imm:$cc), 2342 !strconcat("vpcmp", Suffix, 2343 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2344 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2345 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2346 (_.VT _.RC:$src1), 2347 (_.BroadcastLdFrag addr:$src2), 2348 cond)))]>, 2349 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2350 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2351 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2352 _.ScalarMemOp:$src2, u8imm:$cc), 2353 !strconcat("vpcmp", Suffix, 2354 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2355 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2356 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2357 (_.KVT (Frag_su:$cc 2358 (_.VT _.RC:$src1), 2359 (_.BroadcastLdFrag addr:$src2), 2360 cond))))]>, 2361 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2362 2363 def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2), 2364 (_.VT _.RC:$src1), cond)), 2365 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2366 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2367 2368 def : Pat<(and _.KRCWM:$mask, 2369 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2), 2370 (_.VT _.RC:$src1), cond))), 2371 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2372 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2373 (CommFrag_su.OperandTransform $cc))>; 2374} 2375 2376multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2377 PatFrag Frag_su, PatFrag CommFrag, 2378 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2379 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2380 let Predicates = [prd] in 2381 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2382 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2383 2384 let Predicates = [prd, HasVLX] in { 2385 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2386 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2387 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2388 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2389 } 2390} 2391 2392multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2393 PatFrag Frag_su, PatFrag CommFrag, 2394 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2395 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2396 let Predicates = [prd] in 2397 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2398 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2399 2400 let Predicates = [prd, HasVLX] in { 2401 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2402 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2403 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2404 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2405 } 2406} 2407 2408def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2409 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2410 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2411 return getI8Imm(SSECC, SDLoc(N)); 2412}]>; 2413 2414// Swapped operand version of the above. 2415def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2416 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2417 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2418 SSECC = X86::getSwappedVPCMPImm(SSECC); 2419 return getI8Imm(SSECC, SDLoc(N)); 2420}]>; 2421 2422def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2423 (setcc node:$src1, node:$src2, node:$cc), [{ 2424 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2425 return !ISD::isUnsignedIntSetCC(CC); 2426}], X86pcmpm_imm>; 2427 2428def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2429 (setcc node:$src1, node:$src2, node:$cc), [{ 2430 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2431 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2432}], X86pcmpm_imm>; 2433 2434// Same as above, but commutes immediate. Use for load folding. 2435def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2436 (setcc node:$src1, node:$src2, node:$cc), [{ 2437 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2438 return !ISD::isUnsignedIntSetCC(CC); 2439}], X86pcmpm_imm_commute>; 2440 2441def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2442 (setcc node:$src1, node:$src2, node:$cc), [{ 2443 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2444 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2445}], X86pcmpm_imm_commute>; 2446 2447def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2448 (setcc node:$src1, node:$src2, node:$cc), [{ 2449 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2450 return ISD::isUnsignedIntSetCC(CC); 2451}], X86pcmpm_imm>; 2452 2453def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2454 (setcc node:$src1, node:$src2, node:$cc), [{ 2455 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2456 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2457}], X86pcmpm_imm>; 2458 2459// Same as above, but commutes immediate. Use for load folding. 2460def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2461 (setcc node:$src1, node:$src2, node:$cc), [{ 2462 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2463 return ISD::isUnsignedIntSetCC(CC); 2464}], X86pcmpm_imm_commute>; 2465 2466def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2467 (setcc node:$src1, node:$src2, node:$cc), [{ 2468 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2469 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2470}], X86pcmpm_imm_commute>; 2471 2472// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2473defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2474 X86pcmpm_commute, X86pcmpm_commute_su, 2475 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2476 EVEX_CD8<8, CD8VF>; 2477defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2478 X86pcmpum_commute, X86pcmpum_commute_su, 2479 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2480 EVEX_CD8<8, CD8VF>; 2481 2482defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2483 X86pcmpm_commute, X86pcmpm_commute_su, 2484 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2485 VEX_W, EVEX_CD8<16, CD8VF>; 2486defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2487 X86pcmpum_commute, X86pcmpum_commute_su, 2488 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2489 VEX_W, EVEX_CD8<16, CD8VF>; 2490 2491defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2492 X86pcmpm_commute, X86pcmpm_commute_su, 2493 SchedWriteVecALU, avx512vl_i32_info, 2494 HasAVX512>, EVEX_CD8<32, CD8VF>; 2495defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2496 X86pcmpum_commute, X86pcmpum_commute_su, 2497 SchedWriteVecALU, avx512vl_i32_info, 2498 HasAVX512>, EVEX_CD8<32, CD8VF>; 2499 2500defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2501 X86pcmpm_commute, X86pcmpm_commute_su, 2502 SchedWriteVecALU, avx512vl_i64_info, 2503 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2504defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2505 X86pcmpum_commute, X86pcmpum_commute_su, 2506 SchedWriteVecALU, avx512vl_i64_info, 2507 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2508 2509def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2510 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2511 return N->hasOneUse(); 2512}]>; 2513def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2514 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{ 2515 return N->hasOneUse(); 2516}]>; 2517 2518def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2519 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2520 return getI8Imm(Imm, SDLoc(N)); 2521}]>; 2522 2523multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2524 string Name> { 2525 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2526 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2527 "vcmp"#_.Suffix, 2528 "$cc, $src2, $src1", "$src1, $src2, $cc", 2529 (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2530 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2531 1>, Sched<[sched]>; 2532 2533 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2534 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2535 "vcmp"#_.Suffix, 2536 "$cc, $src2, $src1", "$src1, $src2, $cc", 2537 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2538 timm:$cc), 2539 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2540 timm:$cc)>, 2541 Sched<[sched.Folded, sched.ReadAfterFold]>; 2542 2543 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2544 (outs _.KRC:$dst), 2545 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2546 "vcmp"#_.Suffix, 2547 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2548 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2549 (X86cmpm (_.VT _.RC:$src1), 2550 (_.VT (_.BroadcastLdFrag addr:$src2)), 2551 timm:$cc), 2552 (X86cmpm_su (_.VT _.RC:$src1), 2553 (_.VT (_.BroadcastLdFrag addr:$src2)), 2554 timm:$cc)>, 2555 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2556 2557 // Patterns for selecting with loads in other operand. 2558 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2559 timm:$cc), 2560 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2561 (X86cmpm_imm_commute timm:$cc))>; 2562 2563 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2564 (_.VT _.RC:$src1), 2565 timm:$cc)), 2566 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2567 _.RC:$src1, addr:$src2, 2568 (X86cmpm_imm_commute timm:$cc))>; 2569 2570 def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2), 2571 (_.VT _.RC:$src1), timm:$cc), 2572 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2573 (X86cmpm_imm_commute timm:$cc))>; 2574 2575 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2576 (_.VT _.RC:$src1), 2577 timm:$cc)), 2578 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2579 _.RC:$src1, addr:$src2, 2580 (X86cmpm_imm_commute timm:$cc))>; 2581} 2582 2583multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2584 // comparison code form (VCMP[EQ/LT/LE/...] 2585 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2586 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2587 "vcmp"#_.Suffix, 2588 "$cc, {sae}, $src2, $src1", 2589 "$src1, $src2, {sae}, $cc", 2590 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2591 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2592 timm:$cc)>, 2593 EVEX_B, Sched<[sched]>; 2594} 2595 2596multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 2597 let Predicates = [HasAVX512] in { 2598 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2599 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2600 2601 } 2602 let Predicates = [HasAVX512,HasVLX] in { 2603 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2604 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2605 } 2606} 2607 2608defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2609 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2610defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2611 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2612 2613// Patterns to select fp compares with load as first operand. 2614let Predicates = [HasAVX512] in { 2615 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, 2616 timm:$cc)), 2617 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2618 2619 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, 2620 timm:$cc)), 2621 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2622} 2623 2624// ---------------------------------------------------------------- 2625// FPClass 2626 2627def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2628 (X86Vfpclasss node:$src1, node:$src2), [{ 2629 return N->hasOneUse(); 2630}]>; 2631 2632def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2633 (X86Vfpclass node:$src1, node:$src2), [{ 2634 return N->hasOneUse(); 2635}]>; 2636 2637//handle fpclass instruction mask = op(reg_scalar,imm) 2638// op(mem_scalar,imm) 2639multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2640 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2641 Predicate prd> { 2642 let Predicates = [prd], ExeDomain = _.ExeDomain in { 2643 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2644 (ins _.RC:$src1, i32u8imm:$src2), 2645 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2646 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2647 (i32 timm:$src2)))]>, 2648 Sched<[sched]>; 2649 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2650 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2651 OpcodeStr##_.Suffix# 2652 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2653 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2654 (X86Vfpclasss_su (_.VT _.RC:$src1), 2655 (i32 timm:$src2))))]>, 2656 EVEX_K, Sched<[sched]>; 2657 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2658 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2659 OpcodeStr##_.Suffix## 2660 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2661 [(set _.KRC:$dst, 2662 (X86Vfpclasss _.ScalarIntMemCPat:$src1, 2663 (i32 timm:$src2)))]>, 2664 Sched<[sched.Folded, sched.ReadAfterFold]>; 2665 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2666 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2667 OpcodeStr##_.Suffix## 2668 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2669 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2670 (X86Vfpclasss_su _.ScalarIntMemCPat:$src1, 2671 (i32 timm:$src2))))]>, 2672 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2673 } 2674} 2675 2676//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2677// fpclass(reg_vec, mem_vec, imm) 2678// fpclass(reg_vec, broadcast(eltVt), imm) 2679multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2680 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2681 string mem>{ 2682 let ExeDomain = _.ExeDomain in { 2683 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2684 (ins _.RC:$src1, i32u8imm:$src2), 2685 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2686 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2687 (i32 timm:$src2)))]>, 2688 Sched<[sched]>; 2689 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2690 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2691 OpcodeStr##_.Suffix# 2692 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2693 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2694 (X86Vfpclass_su (_.VT _.RC:$src1), 2695 (i32 timm:$src2))))]>, 2696 EVEX_K, Sched<[sched]>; 2697 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2698 (ins _.MemOp:$src1, i32u8imm:$src2), 2699 OpcodeStr##_.Suffix#"{"#mem#"}"# 2700 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2701 [(set _.KRC:$dst,(X86Vfpclass 2702 (_.VT (_.LdFrag addr:$src1)), 2703 (i32 timm:$src2)))]>, 2704 Sched<[sched.Folded, sched.ReadAfterFold]>; 2705 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2706 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2707 OpcodeStr##_.Suffix#"{"#mem#"}"# 2708 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2709 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2710 (_.VT (_.LdFrag addr:$src1)), 2711 (i32 timm:$src2))))]>, 2712 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2713 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2714 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2715 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"## 2716 _.BroadcastStr##", $dst|$dst, ${src1}" 2717 ##_.BroadcastStr##", $src2}", 2718 [(set _.KRC:$dst,(X86Vfpclass 2719 (_.VT (_.BroadcastLdFrag addr:$src1)), 2720 (i32 timm:$src2)))]>, 2721 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2722 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2723 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2724 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"## 2725 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## 2726 _.BroadcastStr##", $src2}", 2727 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2728 (_.VT (_.BroadcastLdFrag addr:$src1)), 2729 (i32 timm:$src2))))]>, 2730 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2731 } 2732 2733 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2734 // the memory form. 2735 def : InstAlias<OpcodeStr#_.Suffix#mem# 2736 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2737 (!cast<Instruction>(NAME#"rr") 2738 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2739 def : InstAlias<OpcodeStr#_.Suffix#mem# 2740 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2741 (!cast<Instruction>(NAME#"rrk") 2742 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2743 def : InstAlias<OpcodeStr#_.Suffix#mem# 2744 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2745 _.BroadcastStr#", $src2}", 2746 (!cast<Instruction>(NAME#"rmb") 2747 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2748 def : InstAlias<OpcodeStr#_.Suffix#mem# 2749 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2750 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2751 (!cast<Instruction>(NAME#"rmbk") 2752 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2753} 2754 2755multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2756 bits<8> opc, X86SchedWriteWidths sched, 2757 Predicate prd>{ 2758 let Predicates = [prd] in { 2759 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2760 _.info512, "z">, EVEX_V512; 2761 } 2762 let Predicates = [prd, HasVLX] in { 2763 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2764 _.info128, "x">, EVEX_V128; 2765 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2766 _.info256, "y">, EVEX_V256; 2767 } 2768} 2769 2770multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2771 bits<8> opcScalar, X86SchedWriteWidths sched, 2772 Predicate prd> { 2773 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2774 sched, prd>, 2775 EVEX_CD8<32, CD8VF>; 2776 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2777 sched, prd>, 2778 EVEX_CD8<64, CD8VF> , VEX_W; 2779 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2780 sched.Scl, f32x_info, prd>, VEX_LIG, 2781 EVEX_CD8<32, CD8VT1>; 2782 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2783 sched.Scl, f64x_info, prd>, VEX_LIG, 2784 EVEX_CD8<64, CD8VT1>, VEX_W; 2785} 2786 2787defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp, 2788 HasDQI>, AVX512AIi8Base, EVEX; 2789 2790//----------------------------------------------------------------- 2791// Mask register copy, including 2792// - copy between mask registers 2793// - load/store mask registers 2794// - copy from GPR to mask register and vice versa 2795// 2796multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2797 string OpcodeStr, RegisterClass KRC, 2798 ValueType vvt, X86MemOperand x86memop> { 2799 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2800 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2801 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2802 Sched<[WriteMove]>; 2803 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2804 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2805 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2806 Sched<[WriteLoad]>; 2807 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2808 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2809 [(store KRC:$src, addr:$dst)]>, 2810 Sched<[WriteStore]>; 2811} 2812 2813multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2814 string OpcodeStr, 2815 RegisterClass KRC, RegisterClass GRC> { 2816 let hasSideEffects = 0 in { 2817 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2818 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2819 Sched<[WriteMove]>; 2820 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2821 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2822 Sched<[WriteMove]>; 2823 } 2824} 2825 2826let Predicates = [HasDQI] in 2827 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2828 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2829 VEX, PD; 2830 2831let Predicates = [HasAVX512] in 2832 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2833 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2834 VEX, PS; 2835 2836let Predicates = [HasBWI] in { 2837 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2838 VEX, PD, VEX_W; 2839 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2840 VEX, XD; 2841 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2842 VEX, PS, VEX_W; 2843 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2844 VEX, XD, VEX_W; 2845} 2846 2847// GR from/to mask register 2848def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2849 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2850def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2851 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2852 2853def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2854 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2855def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2856 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2857 2858def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2859 (KMOVWrk VK16:$src)>; 2860def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2861 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2862def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2863 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2864def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2865 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2866 2867def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2868 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2869def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2870 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2871def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2872 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2873def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2874 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2875 2876def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2877 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2878def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2879 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2880def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2881 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2882def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2883 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2884 2885// Load/store kreg 2886let Predicates = [HasDQI] in { 2887 def : Pat<(store VK1:$src, addr:$dst), 2888 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; 2889 2890 def : Pat<(v1i1 (load addr:$src)), 2891 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2892 def : Pat<(v2i1 (load addr:$src)), 2893 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2894 def : Pat<(v4i1 (load addr:$src)), 2895 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2896} 2897 2898let Predicates = [HasAVX512] in { 2899 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2900 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2901 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2902 (KMOVWkm addr:$src)>; 2903} 2904 2905def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2906 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2907 SDTCVecEltisVT<1, i1>, 2908 SDTCisPtrTy<2>]>>; 2909 2910let Predicates = [HasAVX512] in { 2911 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2912 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2913 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2914 2915 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2916 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2917 2918 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2919 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2920 2921 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2922 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2923 } 2924 2925 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2926 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2927 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2928 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2929 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2930 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2931 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2932 2933 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2934 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2935 (COPY_TO_REGCLASS 2936 (KMOVWkr (AND32ri8 2937 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2938 (i32 1))), VK16)>; 2939} 2940 2941// Mask unary operation 2942// - KNOT 2943multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2944 RegisterClass KRC, SDPatternOperator OpNode, 2945 X86FoldableSchedWrite sched, Predicate prd> { 2946 let Predicates = [prd] in 2947 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2948 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2949 [(set KRC:$dst, (OpNode KRC:$src))]>, 2950 Sched<[sched]>; 2951} 2952 2953multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2954 SDPatternOperator OpNode, 2955 X86FoldableSchedWrite sched> { 2956 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2957 sched, HasDQI>, VEX, PD; 2958 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2959 sched, HasAVX512>, VEX, PS; 2960 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2961 sched, HasBWI>, VEX, PD, VEX_W; 2962 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2963 sched, HasBWI>, VEX, PS, VEX_W; 2964} 2965 2966// TODO - do we need a X86SchedWriteWidths::KMASK type? 2967defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2968 2969// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2970let Predicates = [HasAVX512, NoDQI] in 2971def : Pat<(vnot VK8:$src), 2972 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2973 2974def : Pat<(vnot VK4:$src), 2975 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2976def : Pat<(vnot VK2:$src), 2977 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2978 2979// Mask binary operation 2980// - KAND, KANDN, KOR, KXNOR, KXOR 2981multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2982 RegisterClass KRC, SDPatternOperator OpNode, 2983 X86FoldableSchedWrite sched, Predicate prd, 2984 bit IsCommutable> { 2985 let Predicates = [prd], isCommutable = IsCommutable in 2986 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2987 !strconcat(OpcodeStr, 2988 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2989 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2990 Sched<[sched]>; 2991} 2992 2993multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2994 SDPatternOperator OpNode, 2995 X86FoldableSchedWrite sched, bit IsCommutable, 2996 Predicate prdW = HasAVX512> { 2997 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2998 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 2999 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3000 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 3001 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3002 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 3003 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3004 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3005} 3006 3007def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; 3008def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; 3009// These nodes use 'vnot' instead of 'not' to support vectors. 3010def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3011def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3012 3013// TODO - do we need a X86SchedWriteWidths::KMASK type? 3014defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3015defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3016defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3017defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3018defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3019defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3020 3021multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, 3022 Instruction Inst> { 3023 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3024 // for the DQI set, this type is legal and KxxxB instruction is used 3025 let Predicates = [NoDQI] in 3026 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3027 (COPY_TO_REGCLASS 3028 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3029 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3030 3031 // All types smaller than 8 bits require conversion anyway 3032 def : Pat<(OpNode VK1:$src1, VK1:$src2), 3033 (COPY_TO_REGCLASS (Inst 3034 (COPY_TO_REGCLASS VK1:$src1, VK16), 3035 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3036 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3037 (COPY_TO_REGCLASS (Inst 3038 (COPY_TO_REGCLASS VK2:$src1, VK16), 3039 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; 3040 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3041 (COPY_TO_REGCLASS (Inst 3042 (COPY_TO_REGCLASS VK4:$src1, VK16), 3043 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; 3044} 3045 3046defm : avx512_binop_pat<and, and, KANDWrr>; 3047defm : avx512_binop_pat<vandn, andn, KANDNWrr>; 3048defm : avx512_binop_pat<or, or, KORWrr>; 3049defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>; 3050defm : avx512_binop_pat<xor, xor, KXORWrr>; 3051 3052// Mask unpacking 3053multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3054 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3055 Predicate prd> { 3056 let Predicates = [prd] in { 3057 let hasSideEffects = 0 in 3058 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3059 (ins Src.KRC:$src1, Src.KRC:$src2), 3060 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3061 VEX_4V, VEX_L, Sched<[sched]>; 3062 3063 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3064 (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>; 3065 } 3066} 3067 3068defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3069defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3070defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3071 3072// Mask bit testing 3073multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3074 SDNode OpNode, X86FoldableSchedWrite sched, 3075 Predicate prd> { 3076 let Predicates = [prd], Defs = [EFLAGS] in 3077 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3078 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3079 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3080 Sched<[sched]>; 3081} 3082 3083multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3084 X86FoldableSchedWrite sched, 3085 Predicate prdW = HasAVX512> { 3086 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3087 VEX, PD; 3088 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3089 VEX, PS; 3090 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3091 VEX, PS, VEX_W; 3092 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3093 VEX, PD, VEX_W; 3094} 3095 3096// TODO - do we need a X86SchedWriteWidths::KMASK type? 3097defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3098defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3099 3100// Mask shift 3101multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3102 SDNode OpNode, X86FoldableSchedWrite sched> { 3103 let Predicates = [HasAVX512] in 3104 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3105 !strconcat(OpcodeStr, 3106 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3107 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3108 Sched<[sched]>; 3109} 3110 3111multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3112 SDNode OpNode, X86FoldableSchedWrite sched> { 3113 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3114 sched>, VEX, TAPD, VEX_W; 3115 let Predicates = [HasDQI] in 3116 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3117 sched>, VEX, TAPD; 3118 let Predicates = [HasBWI] in { 3119 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3120 sched>, VEX, TAPD, VEX_W; 3121 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3122 sched>, VEX, TAPD; 3123 } 3124} 3125 3126defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3127defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3128 3129// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3130multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3131 string InstStr, 3132 X86VectorVTInfo Narrow, 3133 X86VectorVTInfo Wide> { 3134def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3135 (Narrow.VT Narrow.RC:$src2), cond)), 3136 (COPY_TO_REGCLASS 3137 (!cast<Instruction>(InstStr#"Zrri") 3138 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3139 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3140 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3141 3142def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3143 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3144 (Narrow.VT Narrow.RC:$src2), 3145 cond)))), 3146 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3147 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3148 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3149 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3150 (Frag_su.OperandTransform $cc)), Narrow.KRC)>; 3151} 3152 3153multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3154 PatFrag CommFrag, PatFrag CommFrag_su, 3155 string InstStr, 3156 X86VectorVTInfo Narrow, 3157 X86VectorVTInfo Wide> { 3158// Broadcast load. 3159def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3160 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3161 (COPY_TO_REGCLASS 3162 (!cast<Instruction>(InstStr#"Zrmib") 3163 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3164 addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; 3165 3166def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3167 (Narrow.KVT 3168 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3169 (Narrow.BroadcastLdFrag addr:$src2), 3170 cond)))), 3171 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3172 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3173 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3174 addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; 3175 3176// Commuted with broadcast load. 3177def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3178 (Narrow.VT Narrow.RC:$src1), 3179 cond)), 3180 (COPY_TO_REGCLASS 3181 (!cast<Instruction>(InstStr#"Zrmib") 3182 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3183 addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; 3184 3185def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3186 (Narrow.KVT 3187 (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3188 (Narrow.VT Narrow.RC:$src1), 3189 cond)))), 3190 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3191 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3192 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3193 addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; 3194} 3195 3196// Same as above, but for fp types which don't use PatFrags. 3197multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3198 X86VectorVTInfo Narrow, 3199 X86VectorVTInfo Wide> { 3200def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3201 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3202 (COPY_TO_REGCLASS 3203 (!cast<Instruction>(InstStr#"Zrri") 3204 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3205 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3206 timm:$cc), Narrow.KRC)>; 3207 3208def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3209 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3210 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3211 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3212 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3213 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3214 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3215 timm:$cc), Narrow.KRC)>; 3216 3217// Broadcast load. 3218def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3219 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3220 (COPY_TO_REGCLASS 3221 (!cast<Instruction>(InstStr#"Zrmbi") 3222 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3223 addr:$src2, timm:$cc), Narrow.KRC)>; 3224 3225def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3226 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3227 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3228 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3229 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3230 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3231 addr:$src2, timm:$cc), Narrow.KRC)>; 3232 3233// Commuted with broadcast load. 3234def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3235 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3236 (COPY_TO_REGCLASS 3237 (!cast<Instruction>(InstStr#"Zrmbi") 3238 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3239 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3240 3241def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3242 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3243 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3244 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3245 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3246 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3247 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3248} 3249 3250let Predicates = [HasAVX512, NoVLX] in { 3251 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3252 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3253 3254 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3256 3257 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3259 3260 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3261 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3262 3263 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>; 3264 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3265 3266 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>; 3267 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3268 3269 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3270 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3271 3272 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3273 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3274 3275 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3276 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3277 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3278 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3279} 3280 3281let Predicates = [HasBWI, NoVLX] in { 3282 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3283 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3284 3285 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3286 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3287 3288 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3289 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3290 3291 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3292 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3293} 3294 3295// Mask setting all 0s or 1s 3296multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { 3297 let Predicates = [HasAVX512] in 3298 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3299 SchedRW = [WriteZero] in 3300 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3301 [(set KRC:$dst, (VT Val))]>; 3302} 3303 3304multiclass avx512_mask_setop_w<PatFrag Val> { 3305 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3306 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3307 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3308} 3309 3310defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3311defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3312 3313// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3314let Predicates = [HasAVX512] in { 3315 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3316 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3317 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3318 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3319 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3320 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3321 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3322 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3323} 3324 3325// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3326multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3327 RegisterClass RC, ValueType VT> { 3328 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3329 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3330 3331 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3332 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3333} 3334defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3335defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3336defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3337defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3338defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3339defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3340 3341defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3342defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3343defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3344defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3345defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3346 3347defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3348defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3349defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3350defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3351 3352defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3353defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3354defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3355 3356defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3357defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3358 3359defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3360 3361//===----------------------------------------------------------------------===// 3362// AVX-512 - Aligned and unaligned load and store 3363// 3364 3365multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3366 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3367 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3368 bit NoRMPattern = 0, 3369 SDPatternOperator SelectOprr = vselect> { 3370 let hasSideEffects = 0 in { 3371 let isMoveReg = 1 in 3372 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3373 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3374 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3375 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3376 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3377 (ins _.KRCWM:$mask, _.RC:$src), 3378 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3379 "${dst} {${mask}} {z}, $src}"), 3380 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3381 (_.VT _.RC:$src), 3382 _.ImmAllZerosV)))], _.ExeDomain>, 3383 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3384 3385 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3386 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3387 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3388 !if(NoRMPattern, [], 3389 [(set _.RC:$dst, 3390 (_.VT (ld_frag addr:$src)))]), 3391 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3392 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3393 3394 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3395 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3396 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3397 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3398 "${dst} {${mask}}, $src1}"), 3399 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3400 (_.VT _.RC:$src1), 3401 (_.VT _.RC:$src0))))], _.ExeDomain>, 3402 EVEX, EVEX_K, Sched<[Sched.RR]>; 3403 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3404 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3405 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3406 "${dst} {${mask}}, $src1}"), 3407 [(set _.RC:$dst, (_.VT 3408 (vselect _.KRCWM:$mask, 3409 (_.VT (ld_frag addr:$src1)), 3410 (_.VT _.RC:$src0))))], _.ExeDomain>, 3411 EVEX, EVEX_K, Sched<[Sched.RM]>; 3412 } 3413 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3414 (ins _.KRCWM:$mask, _.MemOp:$src), 3415 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3416 "${dst} {${mask}} {z}, $src}", 3417 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, 3418 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3419 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3420 } 3421 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3422 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3423 3424 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3425 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3426 3427 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3428 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0, 3429 _.KRCWM:$mask, addr:$ptr)>; 3430} 3431 3432multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3433 AVX512VLVectorVTInfo _, Predicate prd, 3434 X86SchedWriteMoveLSWidths Sched, 3435 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3436 let Predicates = [prd] in 3437 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3438 _.info512.AlignedLdFrag, masked_load_aligned, 3439 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3440 3441 let Predicates = [prd, HasVLX] in { 3442 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3443 _.info256.AlignedLdFrag, masked_load_aligned, 3444 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3445 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3446 _.info128.AlignedLdFrag, masked_load_aligned, 3447 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3448 } 3449} 3450 3451multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3452 AVX512VLVectorVTInfo _, Predicate prd, 3453 X86SchedWriteMoveLSWidths Sched, 3454 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3455 SDPatternOperator SelectOprr = vselect> { 3456 let Predicates = [prd] in 3457 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3458 masked_load, Sched.ZMM, "", 3459 NoRMPattern, SelectOprr>, EVEX_V512; 3460 3461 let Predicates = [prd, HasVLX] in { 3462 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3463 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3464 NoRMPattern, SelectOprr>, EVEX_V256; 3465 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3466 masked_load, Sched.XMM, EVEX2VEXOvrd, 3467 NoRMPattern, SelectOprr>, EVEX_V128; 3468 } 3469} 3470 3471multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3472 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3473 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3474 bit NoMRPattern = 0> { 3475 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3476 let isMoveReg = 1 in 3477 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3478 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3479 [], _.ExeDomain>, EVEX, 3480 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3481 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3482 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3483 (ins _.KRCWM:$mask, _.RC:$src), 3484 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3485 "${dst} {${mask}}, $src}", 3486 [], _.ExeDomain>, EVEX, EVEX_K, 3487 FoldGenData<BaseName#_.ZSuffix#rrk>, 3488 Sched<[Sched.RR]>; 3489 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3490 (ins _.KRCWM:$mask, _.RC:$src), 3491 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3492 "${dst} {${mask}} {z}, $src}", 3493 [], _.ExeDomain>, EVEX, EVEX_KZ, 3494 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3495 Sched<[Sched.RR]>; 3496 } 3497 3498 let hasSideEffects = 0, mayStore = 1 in 3499 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3500 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3501 !if(NoMRPattern, [], 3502 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3503 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3504 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3505 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3506 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3507 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3508 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3509 NotMemoryFoldable; 3510 3511 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3512 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3513 _.KRCWM:$mask, _.RC:$src)>; 3514 3515 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3516 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3517 _.RC:$dst, _.RC:$src), 0>; 3518 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3519 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3520 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3521 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3522 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3523 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3524} 3525 3526multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3527 AVX512VLVectorVTInfo _, Predicate prd, 3528 X86SchedWriteMoveLSWidths Sched, 3529 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3530 let Predicates = [prd] in 3531 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3532 masked_store, Sched.ZMM, "", 3533 NoMRPattern>, EVEX_V512; 3534 let Predicates = [prd, HasVLX] in { 3535 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3536 masked_store, Sched.YMM, 3537 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3538 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3539 masked_store, Sched.XMM, EVEX2VEXOvrd, 3540 NoMRPattern>, EVEX_V128; 3541 } 3542} 3543 3544multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3545 AVX512VLVectorVTInfo _, Predicate prd, 3546 X86SchedWriteMoveLSWidths Sched, 3547 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3548 let Predicates = [prd] in 3549 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3550 masked_store_aligned, Sched.ZMM, "", 3551 NoMRPattern>, EVEX_V512; 3552 3553 let Predicates = [prd, HasVLX] in { 3554 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3555 masked_store_aligned, Sched.YMM, 3556 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3557 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3558 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3559 NoMRPattern>, EVEX_V128; 3560 } 3561} 3562 3563defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3564 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3565 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3566 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3567 PS, EVEX_CD8<32, CD8VF>; 3568 3569defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3570 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3571 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3572 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3573 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3574 3575defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3576 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3577 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3578 SchedWriteFMoveLS, "VMOVUPS">, 3579 PS, EVEX_CD8<32, CD8VF>; 3580 3581defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3582 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3583 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3584 SchedWriteFMoveLS, "VMOVUPD">, 3585 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3586 3587defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3588 HasAVX512, SchedWriteVecMoveLS, 3589 "VMOVDQA", 1>, 3590 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3591 HasAVX512, SchedWriteVecMoveLS, 3592 "VMOVDQA", 1>, 3593 PD, EVEX_CD8<32, CD8VF>; 3594 3595defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3596 HasAVX512, SchedWriteVecMoveLS, 3597 "VMOVDQA">, 3598 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3599 HasAVX512, SchedWriteVecMoveLS, 3600 "VMOVDQA">, 3601 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3602 3603defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3604 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3605 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3606 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3607 XD, EVEX_CD8<8, CD8VF>; 3608 3609defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3610 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3611 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3612 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3613 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3614 3615defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3616 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3617 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3618 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3619 XS, EVEX_CD8<32, CD8VF>; 3620 3621defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3622 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3623 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3624 SchedWriteVecMoveLS, "VMOVDQU">, 3625 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3626 3627// Special instructions to help with spilling when we don't have VLX. We need 3628// to load or store from a ZMM register instead. These are converted in 3629// expandPostRAPseudos. 3630let isReMaterializable = 1, canFoldAsLoad = 1, 3631 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3632def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3633 "", []>, Sched<[WriteFLoadX]>; 3634def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3635 "", []>, Sched<[WriteFLoadY]>; 3636def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3637 "", []>, Sched<[WriteFLoadX]>; 3638def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3639 "", []>, Sched<[WriteFLoadY]>; 3640} 3641 3642let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3643def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3644 "", []>, Sched<[WriteFStoreX]>; 3645def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3646 "", []>, Sched<[WriteFStoreY]>; 3647def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3648 "", []>, Sched<[WriteFStoreX]>; 3649def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3650 "", []>, Sched<[WriteFStoreY]>; 3651} 3652 3653def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3654 (v8i64 VR512:$src))), 3655 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3656 VK8), VR512:$src)>; 3657 3658def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3659 (v16i32 VR512:$src))), 3660 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3661 3662// These patterns exist to prevent the above patterns from introducing a second 3663// mask inversion when one already exists. 3664def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), 3665 (v8i64 immAllZerosV), 3666 (v8i64 VR512:$src))), 3667 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3668def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), 3669 (v16i32 immAllZerosV), 3670 (v16i32 VR512:$src))), 3671 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3672 3673multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3674 X86VectorVTInfo Wide> { 3675 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3676 Narrow.RC:$src1, Narrow.RC:$src0)), 3677 (EXTRACT_SUBREG 3678 (Wide.VT 3679 (!cast<Instruction>(InstrStr#"rrk") 3680 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3681 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3682 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3683 Narrow.SubRegIdx)>; 3684 3685 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3686 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3687 (EXTRACT_SUBREG 3688 (Wide.VT 3689 (!cast<Instruction>(InstrStr#"rrkz") 3690 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3691 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3692 Narrow.SubRegIdx)>; 3693} 3694 3695// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3696// available. Use a 512-bit operation and extract. 3697let Predicates = [HasAVX512, NoVLX] in { 3698 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3699 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3700 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3701 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3702 3703 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3704 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3705 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3706 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3707} 3708 3709let Predicates = [HasBWI, NoVLX] in { 3710 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3711 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3712 3713 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3714 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3715} 3716 3717let Predicates = [HasAVX512] in { 3718 // 512-bit load. 3719 def : Pat<(alignedloadv16i32 addr:$src), 3720 (VMOVDQA64Zrm addr:$src)>; 3721 def : Pat<(alignedloadv32i16 addr:$src), 3722 (VMOVDQA64Zrm addr:$src)>; 3723 def : Pat<(alignedloadv64i8 addr:$src), 3724 (VMOVDQA64Zrm addr:$src)>; 3725 def : Pat<(loadv16i32 addr:$src), 3726 (VMOVDQU64Zrm addr:$src)>; 3727 def : Pat<(loadv32i16 addr:$src), 3728 (VMOVDQU64Zrm addr:$src)>; 3729 def : Pat<(loadv64i8 addr:$src), 3730 (VMOVDQU64Zrm addr:$src)>; 3731 3732 // 512-bit store. 3733 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3734 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3735 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3736 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3737 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3738 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3739 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3740 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3741 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3742 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3743 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3744 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3745} 3746 3747let Predicates = [HasVLX] in { 3748 // 128-bit load. 3749 def : Pat<(alignedloadv4i32 addr:$src), 3750 (VMOVDQA64Z128rm addr:$src)>; 3751 def : Pat<(alignedloadv8i16 addr:$src), 3752 (VMOVDQA64Z128rm addr:$src)>; 3753 def : Pat<(alignedloadv16i8 addr:$src), 3754 (VMOVDQA64Z128rm addr:$src)>; 3755 def : Pat<(loadv4i32 addr:$src), 3756 (VMOVDQU64Z128rm addr:$src)>; 3757 def : Pat<(loadv8i16 addr:$src), 3758 (VMOVDQU64Z128rm addr:$src)>; 3759 def : Pat<(loadv16i8 addr:$src), 3760 (VMOVDQU64Z128rm addr:$src)>; 3761 3762 // 128-bit store. 3763 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3764 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3765 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3766 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3767 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3768 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3769 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3770 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3771 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3772 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3773 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3774 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3775 3776 // 256-bit load. 3777 def : Pat<(alignedloadv8i32 addr:$src), 3778 (VMOVDQA64Z256rm addr:$src)>; 3779 def : Pat<(alignedloadv16i16 addr:$src), 3780 (VMOVDQA64Z256rm addr:$src)>; 3781 def : Pat<(alignedloadv32i8 addr:$src), 3782 (VMOVDQA64Z256rm addr:$src)>; 3783 def : Pat<(loadv8i32 addr:$src), 3784 (VMOVDQU64Z256rm addr:$src)>; 3785 def : Pat<(loadv16i16 addr:$src), 3786 (VMOVDQU64Z256rm addr:$src)>; 3787 def : Pat<(loadv32i8 addr:$src), 3788 (VMOVDQU64Z256rm addr:$src)>; 3789 3790 // 256-bit store. 3791 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3792 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3793 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3794 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3795 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3796 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3797 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3798 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3799 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3800 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3801 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3802 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3803} 3804 3805// Move Int Doubleword to Packed Double Int 3806// 3807let ExeDomain = SSEPackedInt in { 3808def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3809 "vmovd\t{$src, $dst|$dst, $src}", 3810 [(set VR128X:$dst, 3811 (v4i32 (scalar_to_vector GR32:$src)))]>, 3812 EVEX, Sched<[WriteVecMoveFromGpr]>; 3813def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3814 "vmovd\t{$src, $dst|$dst, $src}", 3815 [(set VR128X:$dst, 3816 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3817 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3818def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3819 "vmovq\t{$src, $dst|$dst, $src}", 3820 [(set VR128X:$dst, 3821 (v2i64 (scalar_to_vector GR64:$src)))]>, 3822 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3823let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3824def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3825 (ins i64mem:$src), 3826 "vmovq\t{$src, $dst|$dst, $src}", []>, 3827 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3828let isCodeGenOnly = 1 in { 3829def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3830 "vmovq\t{$src, $dst|$dst, $src}", 3831 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3832 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3833def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3834 "vmovq\t{$src, $dst|$dst, $src}", 3835 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3836 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3837} 3838} // ExeDomain = SSEPackedInt 3839 3840// Move Int Doubleword to Single Scalar 3841// 3842let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3843def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3844 "vmovd\t{$src, $dst|$dst, $src}", 3845 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3846 EVEX, Sched<[WriteVecMoveFromGpr]>; 3847} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3848 3849// Move doubleword from xmm register to r/m32 3850// 3851let ExeDomain = SSEPackedInt in { 3852def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3853 "vmovd\t{$src, $dst|$dst, $src}", 3854 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3855 (iPTR 0)))]>, 3856 EVEX, Sched<[WriteVecMoveToGpr]>; 3857def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3858 (ins i32mem:$dst, VR128X:$src), 3859 "vmovd\t{$src, $dst|$dst, $src}", 3860 [(store (i32 (extractelt (v4i32 VR128X:$src), 3861 (iPTR 0))), addr:$dst)]>, 3862 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3863} // ExeDomain = SSEPackedInt 3864 3865// Move quadword from xmm1 register to r/m64 3866// 3867let ExeDomain = SSEPackedInt in { 3868def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3869 "vmovq\t{$src, $dst|$dst, $src}", 3870 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3871 (iPTR 0)))]>, 3872 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 3873 Requires<[HasAVX512]>; 3874 3875let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3876def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3877 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 3878 EVEX, VEX_W, Sched<[WriteVecStore]>, 3879 Requires<[HasAVX512, In64BitMode]>; 3880 3881def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3882 (ins i64mem:$dst, VR128X:$src), 3883 "vmovq\t{$src, $dst|$dst, $src}", 3884 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3885 addr:$dst)]>, 3886 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 3887 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3888 3889let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3890def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3891 (ins VR128X:$src), 3892 "vmovq\t{$src, $dst|$dst, $src}", []>, 3893 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 3894} // ExeDomain = SSEPackedInt 3895 3896def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3897 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3898 3899let Predicates = [HasAVX512] in { 3900 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3901 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3902} 3903 3904// Move Scalar Single to Double Int 3905// 3906let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3907def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3908 (ins FR32X:$src), 3909 "vmovd\t{$src, $dst|$dst, $src}", 3910 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3911 EVEX, Sched<[WriteVecMoveToGpr]>; 3912} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3913 3914// Move Quadword Int to Packed Quadword Int 3915// 3916let ExeDomain = SSEPackedInt in { 3917def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3918 (ins i64mem:$src), 3919 "vmovq\t{$src, $dst|$dst, $src}", 3920 [(set VR128X:$dst, 3921 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3922 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3923} // ExeDomain = SSEPackedInt 3924 3925// Allow "vmovd" but print "vmovq". 3926def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3927 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3928def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3929 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3930 3931//===----------------------------------------------------------------------===// 3932// AVX-512 MOVSS, MOVSD 3933//===----------------------------------------------------------------------===// 3934 3935multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3936 X86VectorVTInfo _> { 3937 let Predicates = [HasAVX512, OptForSize] in 3938 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3939 (ins _.RC:$src1, _.RC:$src2), 3940 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3941 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3942 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 3943 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3944 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3945 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3946 "$dst {${mask}} {z}, $src1, $src2}"), 3947 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3948 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3949 _.ImmAllZerosV)))], 3950 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3951 let Constraints = "$src0 = $dst" in 3952 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3953 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3954 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3955 "$dst {${mask}}, $src1, $src2}"), 3956 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3957 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3958 (_.VT _.RC:$src0))))], 3959 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3960 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3961 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3962 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3963 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3964 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3965 // _alt version uses FR32/FR64 register class. 3966 let isCodeGenOnly = 1 in 3967 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3968 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3969 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3970 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3971 } 3972 let mayLoad = 1, hasSideEffects = 0 in { 3973 let Constraints = "$src0 = $dst" in 3974 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3975 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3976 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3977 "$dst {${mask}}, $src}"), 3978 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3979 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3980 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3981 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3982 "$dst {${mask}} {z}, $src}"), 3983 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3984 } 3985 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3986 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3987 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3988 EVEX, Sched<[WriteFStore]>; 3989 let mayStore = 1, hasSideEffects = 0 in 3990 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3991 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 3992 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3993 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 3994 NotMemoryFoldable; 3995} 3996 3997defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 3998 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 3999 4000defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 4001 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4002 4003 4004multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4005 PatLeaf ZeroFP, X86VectorVTInfo _> { 4006 4007def : Pat<(_.VT (OpNode _.RC:$src0, 4008 (_.VT (scalar_to_vector 4009 (_.EltVT (X86selects VK1WM:$mask, 4010 (_.EltVT _.FRC:$src1), 4011 (_.EltVT _.FRC:$src2))))))), 4012 (!cast<Instruction>(InstrStr#rrk) 4013 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4014 VK1WM:$mask, 4015 (_.VT _.RC:$src0), 4016 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4017 4018def : Pat<(_.VT (OpNode _.RC:$src0, 4019 (_.VT (scalar_to_vector 4020 (_.EltVT (X86selects VK1WM:$mask, 4021 (_.EltVT _.FRC:$src1), 4022 (_.EltVT ZeroFP))))))), 4023 (!cast<Instruction>(InstrStr#rrkz) 4024 VK1WM:$mask, 4025 (_.VT _.RC:$src0), 4026 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4027} 4028 4029multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4030 dag Mask, RegisterClass MaskRC> { 4031 4032def : Pat<(masked_store 4033 (_.info512.VT (insert_subvector undef, 4034 (_.info128.VT _.info128.RC:$src), 4035 (iPTR 0))), addr:$dst, Mask), 4036 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4037 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4038 _.info128.RC:$src)>; 4039 4040} 4041 4042multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4043 AVX512VLVectorVTInfo _, 4044 dag Mask, RegisterClass MaskRC, 4045 SubRegIndex subreg> { 4046 4047def : Pat<(masked_store 4048 (_.info512.VT (insert_subvector undef, 4049 (_.info128.VT _.info128.RC:$src), 4050 (iPTR 0))), addr:$dst, Mask), 4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4052 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4053 _.info128.RC:$src)>; 4054 4055} 4056 4057// This matches the more recent codegen from clang that avoids emitting a 512 4058// bit masked store directly. Codegen will widen 128-bit masked store to 512 4059// bits on AVX512F only targets. 4060multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4061 AVX512VLVectorVTInfo _, 4062 dag Mask512, dag Mask128, 4063 RegisterClass MaskRC, 4064 SubRegIndex subreg> { 4065 4066// AVX512F pattern. 4067def : Pat<(masked_store 4068 (_.info512.VT (insert_subvector undef, 4069 (_.info128.VT _.info128.RC:$src), 4070 (iPTR 0))), addr:$dst, Mask512), 4071 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4072 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4073 _.info128.RC:$src)>; 4074 4075// AVX512VL pattern. 4076def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4077 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4078 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4079 _.info128.RC:$src)>; 4080} 4081 4082multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4083 dag Mask, RegisterClass MaskRC> { 4084 4085def : Pat<(_.info128.VT (extract_subvector 4086 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4087 _.info512.ImmAllZerosV)), 4088 (iPTR 0))), 4089 (!cast<Instruction>(InstrStr#rmkz) 4090 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4091 addr:$srcAddr)>; 4092 4093def : Pat<(_.info128.VT (extract_subvector 4094 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4095 (_.info512.VT (insert_subvector undef, 4096 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4097 (iPTR 0))))), 4098 (iPTR 0))), 4099 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4100 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4101 addr:$srcAddr)>; 4102 4103} 4104 4105multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4106 AVX512VLVectorVTInfo _, 4107 dag Mask, RegisterClass MaskRC, 4108 SubRegIndex subreg> { 4109 4110def : Pat<(_.info128.VT (extract_subvector 4111 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4112 _.info512.ImmAllZerosV)), 4113 (iPTR 0))), 4114 (!cast<Instruction>(InstrStr#rmkz) 4115 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4116 addr:$srcAddr)>; 4117 4118def : Pat<(_.info128.VT (extract_subvector 4119 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4120 (_.info512.VT (insert_subvector undef, 4121 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4122 (iPTR 0))))), 4123 (iPTR 0))), 4124 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4125 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4126 addr:$srcAddr)>; 4127 4128} 4129 4130// This matches the more recent codegen from clang that avoids emitting a 512 4131// bit masked load directly. Codegen will widen 128-bit masked load to 512 4132// bits on AVX512F only targets. 4133multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4134 AVX512VLVectorVTInfo _, 4135 dag Mask512, dag Mask128, 4136 RegisterClass MaskRC, 4137 SubRegIndex subreg> { 4138// AVX512F patterns. 4139def : Pat<(_.info128.VT (extract_subvector 4140 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4141 _.info512.ImmAllZerosV)), 4142 (iPTR 0))), 4143 (!cast<Instruction>(InstrStr#rmkz) 4144 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4145 addr:$srcAddr)>; 4146 4147def : Pat<(_.info128.VT (extract_subvector 4148 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4149 (_.info512.VT (insert_subvector undef, 4150 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4151 (iPTR 0))))), 4152 (iPTR 0))), 4153 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4154 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4155 addr:$srcAddr)>; 4156 4157// AVX512Vl patterns. 4158def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4159 _.info128.ImmAllZerosV)), 4160 (!cast<Instruction>(InstrStr#rmkz) 4161 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4162 addr:$srcAddr)>; 4163 4164def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4165 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4166 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4167 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4168 addr:$srcAddr)>; 4169} 4170 4171defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4172defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4173 4174defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4175 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4176defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4177 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4178defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4179 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4180 4181defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4182 (v16i1 (insert_subvector 4183 (v16i1 immAllZerosV), 4184 (v4i1 (extract_subvector 4185 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4186 (iPTR 0))), 4187 (iPTR 0))), 4188 (v4i1 (extract_subvector 4189 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4190 (iPTR 0))), GR8, sub_8bit>; 4191defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4192 (v8i1 4193 (extract_subvector 4194 (v16i1 4195 (insert_subvector 4196 (v16i1 immAllZerosV), 4197 (v2i1 (extract_subvector 4198 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4199 (iPTR 0))), 4200 (iPTR 0))), 4201 (iPTR 0))), 4202 (v2i1 (extract_subvector 4203 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4204 (iPTR 0))), GR8, sub_8bit>; 4205 4206defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4207 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4208defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4209 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4210defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4211 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4212 4213defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4214 (v16i1 (insert_subvector 4215 (v16i1 immAllZerosV), 4216 (v4i1 (extract_subvector 4217 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4218 (iPTR 0))), 4219 (iPTR 0))), 4220 (v4i1 (extract_subvector 4221 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4222 (iPTR 0))), GR8, sub_8bit>; 4223defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4224 (v8i1 4225 (extract_subvector 4226 (v16i1 4227 (insert_subvector 4228 (v16i1 immAllZerosV), 4229 (v2i1 (extract_subvector 4230 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4231 (iPTR 0))), 4232 (iPTR 0))), 4233 (iPTR 0))), 4234 (v2i1 (extract_subvector 4235 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4236 (iPTR 0))), GR8, sub_8bit>; 4237 4238def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4239 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4240 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4241 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4242 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4243 4244def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4245 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4246 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4247 4248def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4249 (COPY_TO_REGCLASS 4250 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4251 VK1WM:$mask, addr:$src)), 4252 FR32X)>; 4253def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4254 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4255 4256def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4257 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4258 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4259 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4260 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4261 4262def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4263 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4264 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4265 4266def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4267 (COPY_TO_REGCLASS 4268 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4269 VK1WM:$mask, addr:$src)), 4270 FR64X)>; 4271def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4272 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4273 4274let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4275 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4276 (ins VR128X:$src1, VR128X:$src2), 4277 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4278 []>, XS, EVEX_4V, VEX_LIG, 4279 FoldGenData<"VMOVSSZrr">, 4280 Sched<[SchedWriteFShuffle.XMM]>; 4281 4282 let Constraints = "$src0 = $dst" in 4283 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4284 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4285 VR128X:$src1, VR128X:$src2), 4286 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4287 "$dst {${mask}}, $src1, $src2}", 4288 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4289 FoldGenData<"VMOVSSZrrk">, 4290 Sched<[SchedWriteFShuffle.XMM]>; 4291 4292 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4293 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4294 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4295 "$dst {${mask}} {z}, $src1, $src2}", 4296 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4297 FoldGenData<"VMOVSSZrrkz">, 4298 Sched<[SchedWriteFShuffle.XMM]>; 4299 4300 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4301 (ins VR128X:$src1, VR128X:$src2), 4302 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4303 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4304 FoldGenData<"VMOVSDZrr">, 4305 Sched<[SchedWriteFShuffle.XMM]>; 4306 4307 let Constraints = "$src0 = $dst" in 4308 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4309 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4310 VR128X:$src1, VR128X:$src2), 4311 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4312 "$dst {${mask}}, $src1, $src2}", 4313 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4314 VEX_W, FoldGenData<"VMOVSDZrrk">, 4315 Sched<[SchedWriteFShuffle.XMM]>; 4316 4317 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4318 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4319 VR128X:$src2), 4320 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4321 "$dst {${mask}} {z}, $src1, $src2}", 4322 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4323 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4324 Sched<[SchedWriteFShuffle.XMM]>; 4325} 4326 4327def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4328 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4329def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4330 "$dst {${mask}}, $src1, $src2}", 4331 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4332 VR128X:$src1, VR128X:$src2), 0>; 4333def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4334 "$dst {${mask}} {z}, $src1, $src2}", 4335 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4336 VR128X:$src1, VR128X:$src2), 0>; 4337def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4338 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4339def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4340 "$dst {${mask}}, $src1, $src2}", 4341 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4342 VR128X:$src1, VR128X:$src2), 0>; 4343def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4344 "$dst {${mask}} {z}, $src1, $src2}", 4345 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4346 VR128X:$src1, VR128X:$src2), 0>; 4347 4348let Predicates = [HasAVX512, OptForSize] in { 4349 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4350 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4351 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4352 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4353 4354 // Move low f32 and clear high bits. 4355 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4356 (SUBREG_TO_REG (i32 0), 4357 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4358 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4359 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4360 (SUBREG_TO_REG (i32 0), 4361 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4362 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4363 4364 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4365 (SUBREG_TO_REG (i32 0), 4366 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4367 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4368 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4369 (SUBREG_TO_REG (i32 0), 4370 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4371 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4372} 4373 4374// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4375// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4376let Predicates = [HasAVX512, OptForSpeed] in { 4377 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4378 (SUBREG_TO_REG (i32 0), 4379 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4380 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4381 (i8 1))), sub_xmm)>; 4382 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4383 (SUBREG_TO_REG (i32 0), 4384 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4385 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4386 (i8 3))), sub_xmm)>; 4387} 4388 4389let Predicates = [HasAVX512] in { 4390 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4391 (VMOVSSZrm addr:$src)>; 4392 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4393 (VMOVSDZrm addr:$src)>; 4394 4395 // Represent the same patterns above but in the form they appear for 4396 // 256-bit types 4397 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4398 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4399 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4400 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4401 4402 // Represent the same patterns above but in the form they appear for 4403 // 512-bit types 4404 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4405 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4406 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4407 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4408} 4409 4410let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4411def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4412 (ins VR128X:$src), 4413 "vmovq\t{$src, $dst|$dst, $src}", 4414 [(set VR128X:$dst, (v2i64 (X86vzmovl 4415 (v2i64 VR128X:$src))))]>, 4416 EVEX, VEX_W; 4417} 4418 4419let Predicates = [HasAVX512] in { 4420 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4421 (VMOVDI2PDIZrr GR32:$src)>; 4422 4423 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4424 (VMOV64toPQIZrr GR64:$src)>; 4425 4426 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4427 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 4428 (VMOVDI2PDIZrm addr:$src)>; 4429 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4430 (VMOVDI2PDIZrm addr:$src)>; 4431 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4432 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4433 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4434 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4435 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4436 (VMOVQI2PQIZrm addr:$src)>; 4437 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4438 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4439 4440 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4441 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4442 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4443 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4444 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4445 4446 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4447 (SUBREG_TO_REG (i32 0), 4448 (v2f64 (VMOVZPQILo2PQIZrr 4449 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4450 sub_xmm)>; 4451 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4452 (SUBREG_TO_REG (i32 0), 4453 (v2i64 (VMOVZPQILo2PQIZrr 4454 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4455 sub_xmm)>; 4456 4457 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4458 (SUBREG_TO_REG (i32 0), 4459 (v2f64 (VMOVZPQILo2PQIZrr 4460 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4461 sub_xmm)>; 4462 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4463 (SUBREG_TO_REG (i32 0), 4464 (v2i64 (VMOVZPQILo2PQIZrr 4465 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4466 sub_xmm)>; 4467} 4468 4469//===----------------------------------------------------------------------===// 4470// AVX-512 - Non-temporals 4471//===----------------------------------------------------------------------===// 4472 4473def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4474 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4475 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4476 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4477 4478let Predicates = [HasVLX] in { 4479 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4480 (ins i256mem:$src), 4481 "vmovntdqa\t{$src, $dst|$dst, $src}", 4482 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4483 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4484 4485 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4486 (ins i128mem:$src), 4487 "vmovntdqa\t{$src, $dst|$dst, $src}", 4488 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4489 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4490} 4491 4492multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4493 X86SchedWriteMoveLS Sched, 4494 PatFrag st_frag = alignednontemporalstore> { 4495 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4496 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4497 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4498 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4499 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4500} 4501 4502multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4503 AVX512VLVectorVTInfo VTInfo, 4504 X86SchedWriteMoveLSWidths Sched> { 4505 let Predicates = [HasAVX512] in 4506 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4507 4508 let Predicates = [HasAVX512, HasVLX] in { 4509 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4510 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4511 } 4512} 4513 4514defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4515 SchedWriteVecMoveLSNT>, PD; 4516defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4517 SchedWriteFMoveLSNT>, PD, VEX_W; 4518defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4519 SchedWriteFMoveLSNT>, PS; 4520 4521let Predicates = [HasAVX512], AddedComplexity = 400 in { 4522 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4523 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4524 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4525 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4526 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4527 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4528 4529 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4530 (VMOVNTDQAZrm addr:$src)>; 4531 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4532 (VMOVNTDQAZrm addr:$src)>; 4533 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4534 (VMOVNTDQAZrm addr:$src)>; 4535 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4536 (VMOVNTDQAZrm addr:$src)>; 4537 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4538 (VMOVNTDQAZrm addr:$src)>; 4539 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4540 (VMOVNTDQAZrm addr:$src)>; 4541} 4542 4543let Predicates = [HasVLX], AddedComplexity = 400 in { 4544 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4545 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4546 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4547 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4548 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4549 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4550 4551 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4552 (VMOVNTDQAZ256rm addr:$src)>; 4553 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4554 (VMOVNTDQAZ256rm addr:$src)>; 4555 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4556 (VMOVNTDQAZ256rm addr:$src)>; 4557 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4558 (VMOVNTDQAZ256rm addr:$src)>; 4559 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4560 (VMOVNTDQAZ256rm addr:$src)>; 4561 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4562 (VMOVNTDQAZ256rm addr:$src)>; 4563 4564 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4565 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4566 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4567 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4568 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4569 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4570 4571 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4572 (VMOVNTDQAZ128rm addr:$src)>; 4573 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4574 (VMOVNTDQAZ128rm addr:$src)>; 4575 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4576 (VMOVNTDQAZ128rm addr:$src)>; 4577 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4578 (VMOVNTDQAZ128rm addr:$src)>; 4579 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4580 (VMOVNTDQAZ128rm addr:$src)>; 4581 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4582 (VMOVNTDQAZ128rm addr:$src)>; 4583} 4584 4585//===----------------------------------------------------------------------===// 4586// AVX-512 - Integer arithmetic 4587// 4588multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4589 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4590 bit IsCommutable = 0> { 4591 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4592 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4593 "$src2, $src1", "$src1, $src2", 4594 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4595 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4596 Sched<[sched]>; 4597 4598 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4599 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4600 "$src2, $src1", "$src1, $src2", 4601 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4602 AVX512BIBase, EVEX_4V, 4603 Sched<[sched.Folded, sched.ReadAfterFold]>; 4604} 4605 4606multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4607 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4608 bit IsCommutable = 0> : 4609 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4610 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4611 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4612 "${src2}"##_.BroadcastStr##", $src1", 4613 "$src1, ${src2}"##_.BroadcastStr, 4614 (_.VT (OpNode _.RC:$src1, 4615 (_.BroadcastLdFrag addr:$src2)))>, 4616 AVX512BIBase, EVEX_4V, EVEX_B, 4617 Sched<[sched.Folded, sched.ReadAfterFold]>; 4618} 4619 4620multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4621 AVX512VLVectorVTInfo VTInfo, 4622 X86SchedWriteWidths sched, Predicate prd, 4623 bit IsCommutable = 0> { 4624 let Predicates = [prd] in 4625 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4626 IsCommutable>, EVEX_V512; 4627 4628 let Predicates = [prd, HasVLX] in { 4629 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4630 sched.YMM, IsCommutable>, EVEX_V256; 4631 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4632 sched.XMM, IsCommutable>, EVEX_V128; 4633 } 4634} 4635 4636multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4637 AVX512VLVectorVTInfo VTInfo, 4638 X86SchedWriteWidths sched, Predicate prd, 4639 bit IsCommutable = 0> { 4640 let Predicates = [prd] in 4641 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4642 IsCommutable>, EVEX_V512; 4643 4644 let Predicates = [prd, HasVLX] in { 4645 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4646 sched.YMM, IsCommutable>, EVEX_V256; 4647 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4648 sched.XMM, IsCommutable>, EVEX_V128; 4649 } 4650} 4651 4652multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4653 X86SchedWriteWidths sched, Predicate prd, 4654 bit IsCommutable = 0> { 4655 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4656 sched, prd, IsCommutable>, 4657 VEX_W, EVEX_CD8<64, CD8VF>; 4658} 4659 4660multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4661 X86SchedWriteWidths sched, Predicate prd, 4662 bit IsCommutable = 0> { 4663 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4664 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4665} 4666 4667multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4668 X86SchedWriteWidths sched, Predicate prd, 4669 bit IsCommutable = 0> { 4670 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4671 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4672 VEX_WIG; 4673} 4674 4675multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4676 X86SchedWriteWidths sched, Predicate prd, 4677 bit IsCommutable = 0> { 4678 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4679 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4680 VEX_WIG; 4681} 4682 4683multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4684 SDNode OpNode, X86SchedWriteWidths sched, 4685 Predicate prd, bit IsCommutable = 0> { 4686 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4687 IsCommutable>; 4688 4689 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4690 IsCommutable>; 4691} 4692 4693multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4694 SDNode OpNode, X86SchedWriteWidths sched, 4695 Predicate prd, bit IsCommutable = 0> { 4696 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4697 IsCommutable>; 4698 4699 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4700 IsCommutable>; 4701} 4702 4703multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4704 bits<8> opc_d, bits<8> opc_q, 4705 string OpcodeStr, SDNode OpNode, 4706 X86SchedWriteWidths sched, 4707 bit IsCommutable = 0> { 4708 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4709 sched, HasAVX512, IsCommutable>, 4710 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4711 sched, HasBWI, IsCommutable>; 4712} 4713 4714multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4715 X86FoldableSchedWrite sched, 4716 SDNode OpNode,X86VectorVTInfo _Src, 4717 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4718 bit IsCommutable = 0> { 4719 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4720 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4721 "$src2, $src1","$src1, $src2", 4722 (_Dst.VT (OpNode 4723 (_Src.VT _Src.RC:$src1), 4724 (_Src.VT _Src.RC:$src2))), 4725 IsCommutable>, 4726 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4727 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4728 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4729 "$src2, $src1", "$src1, $src2", 4730 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4731 (_Src.LdFrag addr:$src2)))>, 4732 AVX512BIBase, EVEX_4V, 4733 Sched<[sched.Folded, sched.ReadAfterFold]>; 4734 4735 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4736 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4737 OpcodeStr, 4738 "${src2}"##_Brdct.BroadcastStr##", $src1", 4739 "$src1, ${src2}"##_Brdct.BroadcastStr, 4740 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4741 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4742 AVX512BIBase, EVEX_4V, EVEX_B, 4743 Sched<[sched.Folded, sched.ReadAfterFold]>; 4744} 4745 4746defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4747 SchedWriteVecALU, 1>; 4748defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4749 SchedWriteVecALU, 0>; 4750defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4751 SchedWriteVecALU, HasBWI, 1>; 4752defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4753 SchedWriteVecALU, HasBWI, 0>; 4754defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4755 SchedWriteVecALU, HasBWI, 1>; 4756defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4757 SchedWriteVecALU, HasBWI, 0>; 4758defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4759 SchedWritePMULLD, HasAVX512, 1>, T8PD; 4760defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4761 SchedWriteVecIMul, HasBWI, 1>; 4762defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4763 SchedWriteVecIMul, HasDQI, 1>, T8PD, 4764 NotEVEX2VEXConvertible; 4765defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4766 HasBWI, 1>; 4767defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4768 HasBWI, 1>; 4769defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4770 SchedWriteVecIMul, HasBWI, 1>, T8PD; 4771defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 4772 SchedWriteVecALU, HasBWI, 1>; 4773defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4774 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 4775defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4776 SchedWriteVecIMul, HasAVX512, 1>; 4777 4778multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4779 X86SchedWriteWidths sched, 4780 AVX512VLVectorVTInfo _SrcVTInfo, 4781 AVX512VLVectorVTInfo _DstVTInfo, 4782 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4783 let Predicates = [prd] in 4784 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4785 _SrcVTInfo.info512, _DstVTInfo.info512, 4786 v8i64_info, IsCommutable>, 4787 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 4788 let Predicates = [HasVLX, prd] in { 4789 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4790 _SrcVTInfo.info256, _DstVTInfo.info256, 4791 v4i64x_info, IsCommutable>, 4792 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 4793 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4794 _SrcVTInfo.info128, _DstVTInfo.info128, 4795 v2i64x_info, IsCommutable>, 4796 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 4797 } 4798} 4799 4800defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4801 avx512vl_i8_info, avx512vl_i8_info, 4802 X86multishift, HasVBMI, 0>, T8PD; 4803 4804multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4805 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4806 X86FoldableSchedWrite sched> { 4807 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4808 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4809 OpcodeStr, 4810 "${src2}"##_Src.BroadcastStr##", $src1", 4811 "$src1, ${src2}"##_Src.BroadcastStr, 4812 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4813 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4814 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4815 Sched<[sched.Folded, sched.ReadAfterFold]>; 4816} 4817 4818multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4819 SDNode OpNode,X86VectorVTInfo _Src, 4820 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4821 bit IsCommutable = 0> { 4822 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4823 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4824 "$src2, $src1","$src1, $src2", 4825 (_Dst.VT (OpNode 4826 (_Src.VT _Src.RC:$src1), 4827 (_Src.VT _Src.RC:$src2))), 4828 IsCommutable, IsCommutable>, 4829 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 4830 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4831 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4832 "$src2, $src1", "$src1, $src2", 4833 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4834 (_Src.LdFrag addr:$src2)))>, 4835 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 4836 Sched<[sched.Folded, sched.ReadAfterFold]>; 4837} 4838 4839multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4840 SDNode OpNode> { 4841 let Predicates = [HasBWI] in 4842 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4843 v32i16_info, SchedWriteShuffle.ZMM>, 4844 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4845 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4846 let Predicates = [HasBWI, HasVLX] in { 4847 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4848 v16i16x_info, SchedWriteShuffle.YMM>, 4849 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4850 v16i16x_info, SchedWriteShuffle.YMM>, 4851 EVEX_V256; 4852 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4853 v8i16x_info, SchedWriteShuffle.XMM>, 4854 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4855 v8i16x_info, SchedWriteShuffle.XMM>, 4856 EVEX_V128; 4857 } 4858} 4859multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4860 SDNode OpNode> { 4861 let Predicates = [HasBWI] in 4862 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4863 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 4864 let Predicates = [HasBWI, HasVLX] in { 4865 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4866 v32i8x_info, SchedWriteShuffle.YMM>, 4867 EVEX_V256, VEX_WIG; 4868 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4869 v16i8x_info, SchedWriteShuffle.XMM>, 4870 EVEX_V128, VEX_WIG; 4871 } 4872} 4873 4874multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4875 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4876 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4877 let Predicates = [HasBWI] in 4878 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4879 _Dst.info512, SchedWriteVecIMul.ZMM, 4880 IsCommutable>, EVEX_V512; 4881 let Predicates = [HasBWI, HasVLX] in { 4882 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4883 _Dst.info256, SchedWriteVecIMul.YMM, 4884 IsCommutable>, EVEX_V256; 4885 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4886 _Dst.info128, SchedWriteVecIMul.XMM, 4887 IsCommutable>, EVEX_V128; 4888 } 4889} 4890 4891defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4892defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4893defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4894defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4895 4896defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4897 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 4898defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4899 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 4900 4901defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4902 SchedWriteVecALU, HasBWI, 1>, T8PD; 4903defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4904 SchedWriteVecALU, HasBWI, 1>; 4905defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4906 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4907defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4908 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4909 NotEVEX2VEXConvertible; 4910 4911defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4912 SchedWriteVecALU, HasBWI, 1>; 4913defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4914 SchedWriteVecALU, HasBWI, 1>, T8PD; 4915defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4916 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4917defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4918 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4919 NotEVEX2VEXConvertible; 4920 4921defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4922 SchedWriteVecALU, HasBWI, 1>, T8PD; 4923defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4924 SchedWriteVecALU, HasBWI, 1>; 4925defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4926 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4927defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4928 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4929 NotEVEX2VEXConvertible; 4930 4931defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4932 SchedWriteVecALU, HasBWI, 1>; 4933defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4934 SchedWriteVecALU, HasBWI, 1>, T8PD; 4935defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4936 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4937defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4938 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4939 NotEVEX2VEXConvertible; 4940 4941// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4942let Predicates = [HasDQI, NoVLX] in { 4943 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4944 (EXTRACT_SUBREG 4945 (VPMULLQZrr 4946 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4947 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4948 sub_ymm)>; 4949 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4950 (EXTRACT_SUBREG 4951 (VPMULLQZrmb 4952 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4953 addr:$src2), 4954 sub_ymm)>; 4955 4956 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 4957 (EXTRACT_SUBREG 4958 (VPMULLQZrr 4959 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4960 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4961 sub_xmm)>; 4962 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 4963 (EXTRACT_SUBREG 4964 (VPMULLQZrmb 4965 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4966 addr:$src2), 4967 sub_xmm)>; 4968} 4969 4970multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 4971 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 4972 (EXTRACT_SUBREG 4973 (!cast<Instruction>(Instr#"rr") 4974 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4975 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4976 sub_ymm)>; 4977 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4978 (EXTRACT_SUBREG 4979 (!cast<Instruction>(Instr#"rmb") 4980 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4981 addr:$src2), 4982 sub_ymm)>; 4983 4984 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 4985 (EXTRACT_SUBREG 4986 (!cast<Instruction>(Instr#"rr") 4987 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4988 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4989 sub_xmm)>; 4990 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 4991 (EXTRACT_SUBREG 4992 (!cast<Instruction>(Instr#"rmb") 4993 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4994 addr:$src2), 4995 sub_xmm)>; 4996} 4997 4998let Predicates = [HasAVX512, NoVLX] in { 4999 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5000 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5001 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5002 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5003} 5004 5005//===----------------------------------------------------------------------===// 5006// AVX-512 Logical Instructions 5007//===----------------------------------------------------------------------===// 5008 5009defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5010 SchedWriteVecLogic, HasAVX512, 1>; 5011defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5012 SchedWriteVecLogic, HasAVX512, 1>; 5013defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5014 SchedWriteVecLogic, HasAVX512, 1>; 5015defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5016 SchedWriteVecLogic, HasAVX512>; 5017 5018let Predicates = [HasVLX] in { 5019 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5020 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5021 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5022 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5023 5024 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5025 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5026 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5027 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5028 5029 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5030 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5031 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5032 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5033 5034 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5035 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5036 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5037 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5038 5039 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5040 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5041 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5042 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5043 5044 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5045 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5046 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5047 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5048 5049 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5050 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5051 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5052 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5053 5054 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5055 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5056 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5057 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5058 5059 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5060 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5061 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5062 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5063 5064 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5065 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5066 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5067 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5068 5069 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5070 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5071 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5072 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5073 5074 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5075 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5076 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5077 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5078 5079 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5080 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5081 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5082 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5083 5084 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5085 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5086 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5087 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5088 5089 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5090 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5091 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5092 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5093 5094 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5095 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5096 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5097 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5098} 5099 5100let Predicates = [HasAVX512] in { 5101 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5102 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5103 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5104 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5105 5106 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5107 (VPORQZrr VR512:$src1, VR512:$src2)>; 5108 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5109 (VPORQZrr VR512:$src1, VR512:$src2)>; 5110 5111 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5112 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5113 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5114 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5115 5116 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5117 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5118 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5119 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5120 5121 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5122 (VPANDQZrm VR512:$src1, addr:$src2)>; 5123 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5124 (VPANDQZrm VR512:$src1, addr:$src2)>; 5125 5126 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5127 (VPORQZrm VR512:$src1, addr:$src2)>; 5128 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5129 (VPORQZrm VR512:$src1, addr:$src2)>; 5130 5131 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5132 (VPXORQZrm VR512:$src1, addr:$src2)>; 5133 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5134 (VPXORQZrm VR512:$src1, addr:$src2)>; 5135 5136 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5137 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5138 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5139 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5140} 5141 5142// Patterns to catch vselect with different type than logic op. 5143multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5144 X86VectorVTInfo _, 5145 X86VectorVTInfo IntInfo> { 5146 // Masked register-register logical operations. 5147 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5148 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5149 _.RC:$src0)), 5150 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5151 _.RC:$src1, _.RC:$src2)>; 5152 5153 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5154 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5155 _.ImmAllZerosV)), 5156 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5157 _.RC:$src2)>; 5158 5159 // Masked register-memory logical operations. 5160 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5161 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5162 (load addr:$src2)))), 5163 _.RC:$src0)), 5164 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5165 _.RC:$src1, addr:$src2)>; 5166 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5167 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5168 (load addr:$src2)))), 5169 _.ImmAllZerosV)), 5170 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5171 addr:$src2)>; 5172} 5173 5174multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5175 X86VectorVTInfo _, 5176 X86VectorVTInfo IntInfo> { 5177 // Register-broadcast logical operations. 5178 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5179 (bitconvert 5180 (IntInfo.VT (OpNode _.RC:$src1, 5181 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5182 _.RC:$src0)), 5183 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5184 _.RC:$src1, addr:$src2)>; 5185 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5186 (bitconvert 5187 (IntInfo.VT (OpNode _.RC:$src1, 5188 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5189 _.ImmAllZerosV)), 5190 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5191 _.RC:$src1, addr:$src2)>; 5192} 5193 5194multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5195 AVX512VLVectorVTInfo SelectInfo, 5196 AVX512VLVectorVTInfo IntInfo> { 5197let Predicates = [HasVLX] in { 5198 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5199 IntInfo.info128>; 5200 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5201 IntInfo.info256>; 5202} 5203let Predicates = [HasAVX512] in { 5204 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5205 IntInfo.info512>; 5206} 5207} 5208 5209multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5210 AVX512VLVectorVTInfo SelectInfo, 5211 AVX512VLVectorVTInfo IntInfo> { 5212let Predicates = [HasVLX] in { 5213 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5214 SelectInfo.info128, IntInfo.info128>; 5215 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5216 SelectInfo.info256, IntInfo.info256>; 5217} 5218let Predicates = [HasAVX512] in { 5219 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5220 SelectInfo.info512, IntInfo.info512>; 5221} 5222} 5223 5224multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5225 // i64 vselect with i32/i16/i8 logic op 5226 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5227 avx512vl_i32_info>; 5228 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5229 avx512vl_i16_info>; 5230 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5231 avx512vl_i8_info>; 5232 5233 // i32 vselect with i64/i16/i8 logic op 5234 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5235 avx512vl_i64_info>; 5236 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5237 avx512vl_i16_info>; 5238 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5239 avx512vl_i8_info>; 5240 5241 // f32 vselect with i64/i32/i16/i8 logic op 5242 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5243 avx512vl_i64_info>; 5244 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5245 avx512vl_i32_info>; 5246 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5247 avx512vl_i16_info>; 5248 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5249 avx512vl_i8_info>; 5250 5251 // f64 vselect with i64/i32/i16/i8 logic op 5252 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5253 avx512vl_i64_info>; 5254 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5255 avx512vl_i32_info>; 5256 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5257 avx512vl_i16_info>; 5258 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5259 avx512vl_i8_info>; 5260 5261 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5262 avx512vl_f32_info, 5263 avx512vl_i32_info>; 5264 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5265 avx512vl_f64_info, 5266 avx512vl_i64_info>; 5267} 5268 5269defm : avx512_logical_lowering_types<"VPAND", and>; 5270defm : avx512_logical_lowering_types<"VPOR", or>; 5271defm : avx512_logical_lowering_types<"VPXOR", xor>; 5272defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5273 5274//===----------------------------------------------------------------------===// 5275// AVX-512 FP arithmetic 5276//===----------------------------------------------------------------------===// 5277 5278multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5279 SDNode OpNode, SDNode VecNode, 5280 X86FoldableSchedWrite sched, bit IsCommutable> { 5281 let ExeDomain = _.ExeDomain in { 5282 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5283 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5284 "$src2, $src1", "$src1, $src2", 5285 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5286 Sched<[sched]>; 5287 5288 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5289 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5290 "$src2, $src1", "$src1, $src2", 5291 (_.VT (VecNode _.RC:$src1, 5292 _.ScalarIntMemCPat:$src2))>, 5293 Sched<[sched.Folded, sched.ReadAfterFold]>; 5294 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5295 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5296 (ins _.FRC:$src1, _.FRC:$src2), 5297 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5298 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5299 Sched<[sched]> { 5300 let isCommutable = IsCommutable; 5301 } 5302 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5303 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5304 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5305 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5306 (_.ScalarLdFrag addr:$src2)))]>, 5307 Sched<[sched.Folded, sched.ReadAfterFold]>; 5308 } 5309 } 5310} 5311 5312multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5313 SDNode VecNode, X86FoldableSchedWrite sched, 5314 bit IsCommutable = 0> { 5315 let ExeDomain = _.ExeDomain in 5316 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5317 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5318 "$rc, $src2, $src1", "$src1, $src2, $rc", 5319 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5320 (i32 timm:$rc))>, 5321 EVEX_B, EVEX_RC, Sched<[sched]>; 5322} 5323multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5324 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5325 X86FoldableSchedWrite sched, bit IsCommutable, 5326 string EVEX2VexOvrd> { 5327 let ExeDomain = _.ExeDomain in { 5328 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5329 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5330 "$src2, $src1", "$src1, $src2", 5331 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5332 Sched<[sched]>; 5333 5334 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5335 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5336 "$src2, $src1", "$src1, $src2", 5337 (_.VT (VecNode _.RC:$src1, 5338 _.ScalarIntMemCPat:$src2))>, 5339 Sched<[sched.Folded, sched.ReadAfterFold]>; 5340 5341 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5342 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5343 (ins _.FRC:$src1, _.FRC:$src2), 5344 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5345 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5346 Sched<[sched]>, 5347 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5348 let isCommutable = IsCommutable; 5349 } 5350 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5351 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5352 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5353 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5354 (_.ScalarLdFrag addr:$src2)))]>, 5355 Sched<[sched.Folded, sched.ReadAfterFold]>, 5356 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5357 } 5358 5359 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5360 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5361 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5362 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5363 EVEX_B, Sched<[sched]>; 5364 } 5365} 5366 5367multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 5368 SDNode VecNode, SDNode RndNode, 5369 X86SchedWriteSizes sched, bit IsCommutable> { 5370 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5371 sched.PS.Scl, IsCommutable>, 5372 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5373 sched.PS.Scl, IsCommutable>, 5374 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5375 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5376 sched.PD.Scl, IsCommutable>, 5377 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5378 sched.PD.Scl, IsCommutable>, 5379 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5380} 5381 5382multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5383 SDNode VecNode, SDNode SaeNode, 5384 X86SchedWriteSizes sched, bit IsCommutable> { 5385 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5386 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5387 NAME#"SS">, 5388 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5389 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5390 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5391 NAME#"SD">, 5392 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5393} 5394defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds, 5395 SchedWriteFAddSizes, 1>; 5396defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds, 5397 SchedWriteFMulSizes, 1>; 5398defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds, 5399 SchedWriteFAddSizes, 0>; 5400defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds, 5401 SchedWriteFDivSizes, 0>; 5402defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5403 SchedWriteFCmpSizes, 0>; 5404defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5405 SchedWriteFCmpSizes, 0>; 5406 5407// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5408// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5409multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5410 X86VectorVTInfo _, SDNode OpNode, 5411 X86FoldableSchedWrite sched, 5412 string EVEX2VEXOvrd> { 5413 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5414 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5415 (ins _.FRC:$src1, _.FRC:$src2), 5416 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5417 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5418 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5419 let isCommutable = 1; 5420 } 5421 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5422 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5423 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5424 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5425 (_.ScalarLdFrag addr:$src2)))]>, 5426 Sched<[sched.Folded, sched.ReadAfterFold]>, 5427 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5428 } 5429} 5430defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5431 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5432 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5433 5434defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5435 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5436 VEX_W, EVEX_4V, VEX_LIG, 5437 EVEX_CD8<64, CD8VT1>; 5438 5439defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5440 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5441 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5442 5443defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5444 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5445 VEX_W, EVEX_4V, VEX_LIG, 5446 EVEX_CD8<64, CD8VT1>; 5447 5448multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5449 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5450 bit IsCommutable, 5451 bit IsKCommutable = IsCommutable> { 5452 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5453 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5454 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5455 "$src2, $src1", "$src1, $src2", 5456 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 5457 IsKCommutable, IsKCommutable>, 5458 EVEX_4V, Sched<[sched]>; 5459 let mayLoad = 1 in { 5460 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5461 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5462 "$src2, $src1", "$src1, $src2", 5463 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5464 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5465 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5466 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5467 "${src2}"##_.BroadcastStr##", $src1", 5468 "$src1, ${src2}"##_.BroadcastStr, 5469 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5470 EVEX_4V, EVEX_B, 5471 Sched<[sched.Folded, sched.ReadAfterFold]>; 5472 } 5473 } 5474} 5475 5476multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5477 SDPatternOperator OpNodeRnd, 5478 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5479 let ExeDomain = _.ExeDomain in 5480 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5481 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, 5482 "$rc, $src2, $src1", "$src1, $src2, $rc", 5483 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>, 5484 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5485} 5486 5487multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5488 SDPatternOperator OpNodeSAE, 5489 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5490 let ExeDomain = _.ExeDomain in 5491 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5492 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5493 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5494 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5495 EVEX_4V, EVEX_B, Sched<[sched]>; 5496} 5497 5498multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5499 Predicate prd, X86SchedWriteSizes sched, 5500 bit IsCommutable = 0, 5501 bit IsPD128Commutable = IsCommutable> { 5502 let Predicates = [prd] in { 5503 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, 5504 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5505 EVEX_CD8<32, CD8VF>; 5506 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, 5507 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5508 EVEX_CD8<64, CD8VF>; 5509 } 5510 5511 // Define only if AVX512VL feature is present. 5512 let Predicates = [prd, HasVLX] in { 5513 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, 5514 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5515 EVEX_CD8<32, CD8VF>; 5516 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, 5517 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5518 EVEX_CD8<32, CD8VF>; 5519 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, 5520 sched.PD.XMM, IsPD128Commutable, 5521 IsCommutable>, EVEX_V128, PD, VEX_W, 5522 EVEX_CD8<64, CD8VF>; 5523 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, 5524 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5525 EVEX_CD8<64, CD8VF>; 5526 } 5527} 5528 5529multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5530 X86SchedWriteSizes sched> { 5531 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5532 v16f32_info>, 5533 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5534 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5535 v8f64_info>, 5536 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5537} 5538 5539multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5540 X86SchedWriteSizes sched> { 5541 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5542 v16f32_info>, 5543 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5544 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5545 v8f64_info>, 5546 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5547} 5548 5549defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, 5550 SchedWriteFAddSizes, 1>, 5551 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5552defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, 5553 SchedWriteFMulSizes, 1>, 5554 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5555defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, 5556 SchedWriteFAddSizes>, 5557 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5558defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, 5559 SchedWriteFDivSizes>, 5560 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5561defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, 5562 SchedWriteFCmpSizes, 0>, 5563 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5564defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, 5565 SchedWriteFCmpSizes, 0>, 5566 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5567let isCodeGenOnly = 1 in { 5568 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, 5569 SchedWriteFCmpSizes, 1>; 5570 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, 5571 SchedWriteFCmpSizes, 1>; 5572} 5573defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, 5574 SchedWriteFLogicSizes, 1>; 5575defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, 5576 SchedWriteFLogicSizes, 0>; 5577defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, 5578 SchedWriteFLogicSizes, 1>; 5579defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, 5580 SchedWriteFLogicSizes, 1>; 5581 5582multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5583 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5584 let ExeDomain = _.ExeDomain in { 5585 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5586 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5587 "$src2, $src1", "$src1, $src2", 5588 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5589 EVEX_4V, Sched<[sched]>; 5590 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5591 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5592 "$src2, $src1", "$src1, $src2", 5593 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5594 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5595 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5596 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5597 "${src2}"##_.BroadcastStr##", $src1", 5598 "$src1, ${src2}"##_.BroadcastStr, 5599 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5600 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5601 } 5602} 5603 5604multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5605 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5606 let ExeDomain = _.ExeDomain in { 5607 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5609 "$src2, $src1", "$src1, $src2", 5610 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5611 Sched<[sched]>; 5612 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5613 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix, 5614 "$src2, $src1", "$src1, $src2", 5615 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>, 5616 Sched<[sched.Folded, sched.ReadAfterFold]>; 5617 } 5618} 5619 5620multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5621 X86SchedWriteWidths sched> { 5622 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5623 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5624 EVEX_V512, EVEX_CD8<32, CD8VF>; 5625 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5626 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5627 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 5628 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5629 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, 5630 X86scalefsRnd, sched.Scl>, 5631 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5632 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5633 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, 5634 X86scalefsRnd, sched.Scl>, 5635 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W; 5636 5637 // Define only if AVX512VL feature is present. 5638 let Predicates = [HasVLX] in { 5639 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5640 EVEX_V128, EVEX_CD8<32, CD8VF>; 5641 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5642 EVEX_V256, EVEX_CD8<32, CD8VF>; 5643 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5644 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 5645 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5646 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 5647 } 5648} 5649defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 5650 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; 5651 5652//===----------------------------------------------------------------------===// 5653// AVX-512 VPTESTM instructions 5654//===----------------------------------------------------------------------===// 5655 5656multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5657 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5658 string Name> { 5659 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5660 // There are just too many permuations due to commutability and bitcasts. 5661 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5662 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5663 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5664 "$src2, $src1", "$src1, $src2", 5665 (null_frag), (null_frag), 1>, 5666 EVEX_4V, Sched<[sched]>; 5667 let mayLoad = 1 in 5668 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5669 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5670 "$src2, $src1", "$src1, $src2", 5671 (null_frag), (null_frag)>, 5672 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5673 Sched<[sched.Folded, sched.ReadAfterFold]>; 5674 } 5675} 5676 5677multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5678 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5679 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5680 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5681 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5682 "${src2}"##_.BroadcastStr##", $src1", 5683 "$src1, ${src2}"##_.BroadcastStr, 5684 (null_frag), (null_frag)>, 5685 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5686 Sched<[sched.Folded, sched.ReadAfterFold]>; 5687} 5688 5689multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5690 X86SchedWriteWidths sched, 5691 AVX512VLVectorVTInfo _> { 5692 let Predicates = [HasAVX512] in 5693 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>, 5694 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5695 5696 let Predicates = [HasAVX512, HasVLX] in { 5697 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>, 5698 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5699 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>, 5700 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5701 } 5702} 5703 5704multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5705 X86SchedWriteWidths sched> { 5706 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5707 avx512vl_i32_info>; 5708 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5709 avx512vl_i64_info>, VEX_W; 5710} 5711 5712multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5713 X86SchedWriteWidths sched> { 5714 let Predicates = [HasBWI] in { 5715 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5716 v32i16_info, NAME#"W">, EVEX_V512, VEX_W; 5717 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5718 v64i8_info, NAME#"B">, EVEX_V512; 5719 } 5720 let Predicates = [HasVLX, HasBWI] in { 5721 5722 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5723 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W; 5724 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5725 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W; 5726 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5727 v32i8x_info, NAME#"B">, EVEX_V256; 5728 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5729 v16i8x_info, NAME#"B">, EVEX_V128; 5730 } 5731} 5732 5733multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5734 X86SchedWriteWidths sched> : 5735 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5736 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5737 5738defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5739 SchedWriteVecLogic>, T8PD; 5740defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5741 SchedWriteVecLogic>, T8XS; 5742 5743//===----------------------------------------------------------------------===// 5744// AVX-512 Shift instructions 5745//===----------------------------------------------------------------------===// 5746 5747multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5748 string OpcodeStr, SDNode OpNode, 5749 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5750 let ExeDomain = _.ExeDomain in { 5751 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5752 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5753 "$src2, $src1", "$src1, $src2", 5754 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5755 Sched<[sched]>; 5756 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5757 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5758 "$src2, $src1", "$src1, $src2", 5759 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5760 (i8 timm:$src2)))>, 5761 Sched<[sched.Folded]>; 5762 } 5763} 5764 5765multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5766 string OpcodeStr, SDNode OpNode, 5767 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5768 let ExeDomain = _.ExeDomain in 5769 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5770 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5771 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", 5772 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5773 EVEX_B, Sched<[sched.Folded]>; 5774} 5775 5776multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5777 X86FoldableSchedWrite sched, ValueType SrcVT, 5778 X86VectorVTInfo _> { 5779 // src2 is always 128-bit 5780 let ExeDomain = _.ExeDomain in { 5781 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5782 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5783 "$src2, $src1", "$src1, $src2", 5784 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5785 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5786 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5787 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5788 "$src2, $src1", "$src1, $src2", 5789 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5790 AVX512BIBase, 5791 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5792 } 5793} 5794 5795multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5796 X86SchedWriteWidths sched, ValueType SrcVT, 5797 AVX512VLVectorVTInfo VTInfo, 5798 Predicate prd> { 5799 let Predicates = [prd] in 5800 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5801 VTInfo.info512>, EVEX_V512, 5802 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5803 let Predicates = [prd, HasVLX] in { 5804 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5805 VTInfo.info256>, EVEX_V256, 5806 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5807 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5808 VTInfo.info128>, EVEX_V128, 5809 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5810 } 5811} 5812 5813multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5814 string OpcodeStr, SDNode OpNode, 5815 X86SchedWriteWidths sched, 5816 bit NotEVEX2VEXConvertibleQ = 0> { 5817 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5818 avx512vl_i32_info, HasAVX512>; 5819 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5820 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5821 avx512vl_i64_info, HasAVX512>, VEX_W; 5822 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5823 avx512vl_i16_info, HasBWI>; 5824} 5825 5826multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5827 string OpcodeStr, SDNode OpNode, 5828 X86SchedWriteWidths sched, 5829 AVX512VLVectorVTInfo VTInfo> { 5830 let Predicates = [HasAVX512] in 5831 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5832 sched.ZMM, VTInfo.info512>, 5833 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5834 VTInfo.info512>, EVEX_V512; 5835 let Predicates = [HasAVX512, HasVLX] in { 5836 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5837 sched.YMM, VTInfo.info256>, 5838 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5839 VTInfo.info256>, EVEX_V256; 5840 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5841 sched.XMM, VTInfo.info128>, 5842 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5843 VTInfo.info128>, EVEX_V128; 5844 } 5845} 5846 5847multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5848 string OpcodeStr, SDNode OpNode, 5849 X86SchedWriteWidths sched> { 5850 let Predicates = [HasBWI] in 5851 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5852 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 5853 let Predicates = [HasVLX, HasBWI] in { 5854 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5855 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 5856 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5857 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 5858 } 5859} 5860 5861multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5862 Format ImmFormR, Format ImmFormM, 5863 string OpcodeStr, SDNode OpNode, 5864 X86SchedWriteWidths sched, 5865 bit NotEVEX2VEXConvertibleQ = 0> { 5866 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 5867 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 5868 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5869 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 5870 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 5871} 5872 5873defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 5874 SchedWriteVecShiftImm>, 5875 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 5876 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5877 5878defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 5879 SchedWriteVecShiftImm>, 5880 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 5881 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5882 5883defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 5884 SchedWriteVecShiftImm, 1>, 5885 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 5886 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5887 5888defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 5889 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5890defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 5891 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5892 5893defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 5894 SchedWriteVecShift>; 5895defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 5896 SchedWriteVecShift, 1>; 5897defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 5898 SchedWriteVecShift>; 5899 5900// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 5901let Predicates = [HasAVX512, NoVLX] in { 5902 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 5903 (EXTRACT_SUBREG (v8i64 5904 (VPSRAQZrr 5905 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5906 VR128X:$src2)), sub_ymm)>; 5907 5908 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5909 (EXTRACT_SUBREG (v8i64 5910 (VPSRAQZrr 5911 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5912 VR128X:$src2)), sub_xmm)>; 5913 5914 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 5915 (EXTRACT_SUBREG (v8i64 5916 (VPSRAQZri 5917 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5918 timm:$src2)), sub_ymm)>; 5919 5920 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 5921 (EXTRACT_SUBREG (v8i64 5922 (VPSRAQZri 5923 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5924 timm:$src2)), sub_xmm)>; 5925} 5926 5927//===-------------------------------------------------------------------===// 5928// Variable Bit Shifts 5929//===-------------------------------------------------------------------===// 5930 5931multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 5932 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5933 let ExeDomain = _.ExeDomain in { 5934 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5935 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5936 "$src2, $src1", "$src1, $src2", 5937 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 5938 AVX5128IBase, EVEX_4V, Sched<[sched]>; 5939 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5940 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5941 "$src2, $src1", "$src1, $src2", 5942 (_.VT (OpNode _.RC:$src1, 5943 (_.VT (_.LdFrag addr:$src2))))>, 5944 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5945 Sched<[sched.Folded, sched.ReadAfterFold]>; 5946 } 5947} 5948 5949multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5950 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5951 let ExeDomain = _.ExeDomain in 5952 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5953 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5954 "${src2}"##_.BroadcastStr##", $src1", 5955 "$src1, ${src2}"##_.BroadcastStr, 5956 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 5957 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5958 Sched<[sched.Folded, sched.ReadAfterFold]>; 5959} 5960 5961multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5962 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 5963 let Predicates = [HasAVX512] in 5964 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 5965 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 5966 5967 let Predicates = [HasAVX512, HasVLX] in { 5968 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 5969 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 5970 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 5971 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 5972 } 5973} 5974 5975multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 5976 SDNode OpNode, X86SchedWriteWidths sched> { 5977 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 5978 avx512vl_i32_info>; 5979 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 5980 avx512vl_i64_info>, VEX_W; 5981} 5982 5983// Use 512bit version to implement 128/256 bit in case NoVLX. 5984multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 5985 SDNode OpNode, list<Predicate> p> { 5986 let Predicates = p in { 5987 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 5988 (_.info256.VT _.info256.RC:$src2))), 5989 (EXTRACT_SUBREG 5990 (!cast<Instruction>(OpcodeStr#"Zrr") 5991 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5992 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5993 sub_ymm)>; 5994 5995 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 5996 (_.info128.VT _.info128.RC:$src2))), 5997 (EXTRACT_SUBREG 5998 (!cast<Instruction>(OpcodeStr#"Zrr") 5999 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6000 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6001 sub_xmm)>; 6002 } 6003} 6004multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6005 SDNode OpNode, X86SchedWriteWidths sched> { 6006 let Predicates = [HasBWI] in 6007 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6008 EVEX_V512, VEX_W; 6009 let Predicates = [HasVLX, HasBWI] in { 6010 6011 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6012 EVEX_V256, VEX_W; 6013 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6014 EVEX_V128, VEX_W; 6015 } 6016} 6017 6018defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6019 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6020 6021defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6022 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6023 6024defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6025 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6026 6027defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6028defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6029 6030defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6031defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6032defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6033defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6034 6035 6036// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6037let Predicates = [HasAVX512, NoVLX] in { 6038 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6039 (EXTRACT_SUBREG (v8i64 6040 (VPROLVQZrr 6041 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6042 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6043 sub_xmm)>; 6044 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6045 (EXTRACT_SUBREG (v8i64 6046 (VPROLVQZrr 6047 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6048 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6049 sub_ymm)>; 6050 6051 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6052 (EXTRACT_SUBREG (v16i32 6053 (VPROLVDZrr 6054 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6055 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6056 sub_xmm)>; 6057 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6058 (EXTRACT_SUBREG (v16i32 6059 (VPROLVDZrr 6060 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6061 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6062 sub_ymm)>; 6063 6064 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6065 (EXTRACT_SUBREG (v8i64 6066 (VPROLQZri 6067 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6068 timm:$src2)), sub_xmm)>; 6069 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6070 (EXTRACT_SUBREG (v8i64 6071 (VPROLQZri 6072 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6073 timm:$src2)), sub_ymm)>; 6074 6075 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6076 (EXTRACT_SUBREG (v16i32 6077 (VPROLDZri 6078 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6079 timm:$src2)), sub_xmm)>; 6080 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6081 (EXTRACT_SUBREG (v16i32 6082 (VPROLDZri 6083 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6084 timm:$src2)), sub_ymm)>; 6085} 6086 6087// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6088let Predicates = [HasAVX512, NoVLX] in { 6089 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6090 (EXTRACT_SUBREG (v8i64 6091 (VPRORVQZrr 6092 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6093 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6094 sub_xmm)>; 6095 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6096 (EXTRACT_SUBREG (v8i64 6097 (VPRORVQZrr 6098 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6099 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6100 sub_ymm)>; 6101 6102 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6103 (EXTRACT_SUBREG (v16i32 6104 (VPRORVDZrr 6105 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6106 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6107 sub_xmm)>; 6108 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6109 (EXTRACT_SUBREG (v16i32 6110 (VPRORVDZrr 6111 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6112 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6113 sub_ymm)>; 6114 6115 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6116 (EXTRACT_SUBREG (v8i64 6117 (VPRORQZri 6118 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6119 timm:$src2)), sub_xmm)>; 6120 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6121 (EXTRACT_SUBREG (v8i64 6122 (VPRORQZri 6123 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6124 timm:$src2)), sub_ymm)>; 6125 6126 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6127 (EXTRACT_SUBREG (v16i32 6128 (VPRORDZri 6129 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6130 timm:$src2)), sub_xmm)>; 6131 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6132 (EXTRACT_SUBREG (v16i32 6133 (VPRORDZri 6134 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6135 timm:$src2)), sub_ymm)>; 6136} 6137 6138//===-------------------------------------------------------------------===// 6139// 1-src variable permutation VPERMW/D/Q 6140//===-------------------------------------------------------------------===// 6141 6142multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6143 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6144 let Predicates = [HasAVX512] in 6145 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6146 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6147 6148 let Predicates = [HasAVX512, HasVLX] in 6149 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6150 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6151} 6152 6153multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6154 string OpcodeStr, SDNode OpNode, 6155 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6156 let Predicates = [HasAVX512] in 6157 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6158 sched, VTInfo.info512>, 6159 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6160 sched, VTInfo.info512>, EVEX_V512; 6161 let Predicates = [HasAVX512, HasVLX] in 6162 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6163 sched, VTInfo.info256>, 6164 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6165 sched, VTInfo.info256>, EVEX_V256; 6166} 6167 6168multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6169 Predicate prd, SDNode OpNode, 6170 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6171 let Predicates = [prd] in 6172 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6173 EVEX_V512 ; 6174 let Predicates = [HasVLX, prd] in { 6175 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6176 EVEX_V256 ; 6177 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6178 EVEX_V128 ; 6179 } 6180} 6181 6182defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6183 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6184defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6185 WriteVarShuffle256, avx512vl_i8_info>; 6186 6187defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6188 WriteVarShuffle256, avx512vl_i32_info>; 6189defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6190 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6191defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6192 WriteFVarShuffle256, avx512vl_f32_info>; 6193defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6194 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6195 6196defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6197 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6198 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6199defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6200 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6201 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6202 6203//===----------------------------------------------------------------------===// 6204// AVX-512 - VPERMIL 6205//===----------------------------------------------------------------------===// 6206 6207multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6208 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6209 X86VectorVTInfo Ctrl> { 6210 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6211 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6212 "$src2, $src1", "$src1, $src2", 6213 (_.VT (OpNode _.RC:$src1, 6214 (Ctrl.VT Ctrl.RC:$src2)))>, 6215 T8PD, EVEX_4V, Sched<[sched]>; 6216 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6217 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6218 "$src2, $src1", "$src1, $src2", 6219 (_.VT (OpNode 6220 _.RC:$src1, 6221 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6222 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6223 Sched<[sched.Folded, sched.ReadAfterFold]>; 6224 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6225 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6226 "${src2}"##_.BroadcastStr##", $src1", 6227 "$src1, ${src2}"##_.BroadcastStr, 6228 (_.VT (OpNode 6229 _.RC:$src1, 6230 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6231 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6232 Sched<[sched.Folded, sched.ReadAfterFold]>; 6233} 6234 6235multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6236 X86SchedWriteWidths sched, 6237 AVX512VLVectorVTInfo _, 6238 AVX512VLVectorVTInfo Ctrl> { 6239 let Predicates = [HasAVX512] in { 6240 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6241 _.info512, Ctrl.info512>, EVEX_V512; 6242 } 6243 let Predicates = [HasAVX512, HasVLX] in { 6244 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6245 _.info128, Ctrl.info128>, EVEX_V128; 6246 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6247 _.info256, Ctrl.info256>, EVEX_V256; 6248 } 6249} 6250 6251multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6252 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6253 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6254 _, Ctrl>; 6255 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6256 X86VPermilpi, SchedWriteFShuffle, _>, 6257 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6258} 6259 6260let ExeDomain = SSEPackedSingle in 6261defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6262 avx512vl_i32_info>; 6263let ExeDomain = SSEPackedDouble in 6264defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6265 avx512vl_i64_info>, VEX_W1X; 6266 6267//===----------------------------------------------------------------------===// 6268// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6269//===----------------------------------------------------------------------===// 6270 6271defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6272 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6273 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6274defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6275 X86PShufhw, SchedWriteShuffle>, 6276 EVEX, AVX512XSIi8Base; 6277defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6278 X86PShuflw, SchedWriteShuffle>, 6279 EVEX, AVX512XDIi8Base; 6280 6281//===----------------------------------------------------------------------===// 6282// AVX-512 - VPSHUFB 6283//===----------------------------------------------------------------------===// 6284 6285multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6286 X86SchedWriteWidths sched> { 6287 let Predicates = [HasBWI] in 6288 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6289 EVEX_V512; 6290 6291 let Predicates = [HasVLX, HasBWI] in { 6292 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6293 EVEX_V256; 6294 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6295 EVEX_V128; 6296 } 6297} 6298 6299defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6300 SchedWriteVarShuffle>, VEX_WIG; 6301 6302//===----------------------------------------------------------------------===// 6303// Move Low to High and High to Low packed FP Instructions 6304//===----------------------------------------------------------------------===// 6305 6306def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6307 (ins VR128X:$src1, VR128X:$src2), 6308 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6309 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6310 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6311let isCommutable = 1 in 6312def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6313 (ins VR128X:$src1, VR128X:$src2), 6314 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6315 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6316 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6317 6318//===----------------------------------------------------------------------===// 6319// VMOVHPS/PD VMOVLPS Instructions 6320// All patterns was taken from SSS implementation. 6321//===----------------------------------------------------------------------===// 6322 6323multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6324 SDPatternOperator OpNode, 6325 X86VectorVTInfo _> { 6326 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6327 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6328 (ins _.RC:$src1, f64mem:$src2), 6329 !strconcat(OpcodeStr, 6330 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6331 [(set _.RC:$dst, 6332 (OpNode _.RC:$src1, 6333 (_.VT (bitconvert 6334 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6335 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6336} 6337 6338// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6339// SSE1. And MOVLPS pattern is even more complex. 6340defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6341 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6342defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6343 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6344defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6345 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6346defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6347 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6348 6349let Predicates = [HasAVX512] in { 6350 // VMOVHPD patterns 6351 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, 6352 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 6353 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6354 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6355 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6356 6357 // VMOVLPD patterns 6358 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6359 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6360} 6361 6362let SchedRW = [WriteFStore] in { 6363let mayStore = 1, hasSideEffects = 0 in 6364def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6365 (ins f64mem:$dst, VR128X:$src), 6366 "vmovhps\t{$src, $dst|$dst, $src}", 6367 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6368def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6369 (ins f64mem:$dst, VR128X:$src), 6370 "vmovhpd\t{$src, $dst|$dst, $src}", 6371 [(store (f64 (extractelt 6372 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6373 (iPTR 0))), addr:$dst)]>, 6374 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6375let mayStore = 1, hasSideEffects = 0 in 6376def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6377 (ins f64mem:$dst, VR128X:$src), 6378 "vmovlps\t{$src, $dst|$dst, $src}", 6379 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6380def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6381 (ins f64mem:$dst, VR128X:$src), 6382 "vmovlpd\t{$src, $dst|$dst, $src}", 6383 [(store (f64 (extractelt (v2f64 VR128X:$src), 6384 (iPTR 0))), addr:$dst)]>, 6385 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6386} // SchedRW 6387 6388let Predicates = [HasAVX512] in { 6389 // VMOVHPD patterns 6390 def : Pat<(store (f64 (extractelt 6391 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6392 (iPTR 0))), addr:$dst), 6393 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6394} 6395//===----------------------------------------------------------------------===// 6396// FMA - Fused Multiply Operations 6397// 6398 6399multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6400 X86FoldableSchedWrite sched, 6401 X86VectorVTInfo _, string Suff> { 6402 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6403 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6404 (ins _.RC:$src2, _.RC:$src3), 6405 OpcodeStr, "$src3, $src2", "$src2, $src3", 6406 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6407 AVX512FMA3Base, Sched<[sched]>; 6408 6409 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6410 (ins _.RC:$src2, _.MemOp:$src3), 6411 OpcodeStr, "$src3, $src2", "$src2, $src3", 6412 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6413 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6414 6415 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6416 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6417 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6418 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6419 (OpNode _.RC:$src2, 6420 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6421 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6422 } 6423} 6424 6425multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6426 X86FoldableSchedWrite sched, 6427 X86VectorVTInfo _, string Suff> { 6428 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6429 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6430 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6431 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6432 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6433 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6434} 6435 6436multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6437 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6438 AVX512VLVectorVTInfo _, string Suff> { 6439 let Predicates = [HasAVX512] in { 6440 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6441 _.info512, Suff>, 6442 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6443 _.info512, Suff>, 6444 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6445 } 6446 let Predicates = [HasVLX, HasAVX512] in { 6447 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM, 6448 _.info256, Suff>, 6449 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6450 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM, 6451 _.info128, Suff>, 6452 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6453 } 6454} 6455 6456multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6457 SDNode OpNodeRnd> { 6458 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6459 SchedWriteFMA, avx512vl_f32_info, "PS">; 6460 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6461 SchedWriteFMA, avx512vl_f64_info, "PD">, 6462 VEX_W; 6463} 6464 6465defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; 6466defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; 6467defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; 6468defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; 6469defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; 6470defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; 6471 6472 6473multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6474 X86FoldableSchedWrite sched, 6475 X86VectorVTInfo _, string Suff> { 6476 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6477 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6478 (ins _.RC:$src2, _.RC:$src3), 6479 OpcodeStr, "$src3, $src2", "$src2, $src3", 6480 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1, 6481 vselect, 1>, AVX512FMA3Base, Sched<[sched]>; 6482 6483 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6484 (ins _.RC:$src2, _.MemOp:$src3), 6485 OpcodeStr, "$src3, $src2", "$src2, $src3", 6486 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6487 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6488 6489 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6490 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6491 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6492 "$src2, ${src3}"##_.BroadcastStr, 6493 (_.VT (OpNode _.RC:$src2, 6494 (_.VT (_.BroadcastLdFrag addr:$src3)), 6495 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, 6496 Sched<[sched.Folded, sched.ReadAfterFold]>; 6497 } 6498} 6499 6500multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6501 X86FoldableSchedWrite sched, 6502 X86VectorVTInfo _, string Suff> { 6503 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6504 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6505 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6506 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6507 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6508 1, 1, vselect, 1>, 6509 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6510} 6511 6512multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6513 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6514 AVX512VLVectorVTInfo _, string Suff> { 6515 let Predicates = [HasAVX512] in { 6516 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6517 _.info512, Suff>, 6518 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6519 _.info512, Suff>, 6520 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6521 } 6522 let Predicates = [HasVLX, HasAVX512] in { 6523 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM, 6524 _.info256, Suff>, 6525 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6526 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM, 6527 _.info128, Suff>, 6528 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6529 } 6530} 6531 6532multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6533 SDNode OpNodeRnd > { 6534 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6535 SchedWriteFMA, avx512vl_f32_info, "PS">; 6536 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6537 SchedWriteFMA, avx512vl_f64_info, "PD">, 6538 VEX_W; 6539} 6540 6541defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; 6542defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; 6543defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; 6544defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; 6545defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; 6546defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; 6547 6548multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6549 X86FoldableSchedWrite sched, 6550 X86VectorVTInfo _, string Suff> { 6551 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6552 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6553 (ins _.RC:$src2, _.RC:$src3), 6554 OpcodeStr, "$src3, $src2", "$src2, $src3", 6555 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>, 6556 AVX512FMA3Base, Sched<[sched]>; 6557 6558 // Pattern is 312 order so that the load is in a different place from the 6559 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6560 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6561 (ins _.RC:$src2, _.MemOp:$src3), 6562 OpcodeStr, "$src3, $src2", "$src2, $src3", 6563 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6564 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6565 6566 // Pattern is 312 order so that the load is in a different place from the 6567 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6568 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6569 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6570 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6571 "$src2, ${src3}"##_.BroadcastStr, 6572 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6573 _.RC:$src1, _.RC:$src2)), 1, 0>, 6574 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6575 } 6576} 6577 6578multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6579 X86FoldableSchedWrite sched, 6580 X86VectorVTInfo _, string Suff> { 6581 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6582 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6583 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6584 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6585 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6586 1, 1, vselect, 1>, 6587 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6588} 6589 6590multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6591 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6592 AVX512VLVectorVTInfo _, string Suff> { 6593 let Predicates = [HasAVX512] in { 6594 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6595 _.info512, Suff>, 6596 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6597 _.info512, Suff>, 6598 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6599 } 6600 let Predicates = [HasVLX, HasAVX512] in { 6601 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM, 6602 _.info256, Suff>, 6603 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6604 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM, 6605 _.info128, Suff>, 6606 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6607 } 6608} 6609 6610multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6611 SDNode OpNodeRnd > { 6612 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6613 SchedWriteFMA, avx512vl_f32_info, "PS">; 6614 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6615 SchedWriteFMA, avx512vl_f64_info, "PD">, 6616 VEX_W; 6617} 6618 6619defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; 6620defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; 6621defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; 6622defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; 6623defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; 6624defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; 6625 6626// Scalar FMA 6627multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6628 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6629let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6630 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6631 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6632 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6633 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>; 6634 6635 let mayLoad = 1 in 6636 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6637 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6638 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6639 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>; 6640 6641 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6642 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6643 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6644 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6645 6646 let isCodeGenOnly = 1, isCommutable = 1 in { 6647 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6648 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6649 !strconcat(OpcodeStr, 6650 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6651 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>; 6652 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst), 6653 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6654 !strconcat(OpcodeStr, 6655 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6656 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>; 6657 6658 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6659 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6660 !strconcat(OpcodeStr, 6661 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6662 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6663 Sched<[SchedWriteFMA.Scl]>; 6664 }// isCodeGenOnly = 1 6665}// Constraints = "$src1 = $dst" 6666} 6667 6668multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6669 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 6670 X86VectorVTInfo _, string SUFF> { 6671 let ExeDomain = _.ExeDomain in { 6672 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6673 // Operands for intrinsic are in 123 order to preserve passthu 6674 // semantics. 6675 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6676 _.FRC:$src3))), 6677 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6678 (_.ScalarLdFrag addr:$src3)))), 6679 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6680 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6681 6682 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6683 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6684 _.FRC:$src1))), 6685 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6686 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6687 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6688 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6689 6690 // One pattern is 312 order so that the load is in a different place from the 6691 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6692 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6693 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6694 _.FRC:$src2))), 6695 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6696 _.FRC:$src1, _.FRC:$src2))), 6697 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6698 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6699 } 6700} 6701 6702multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6703 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { 6704 let Predicates = [HasAVX512] in { 6705 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6706 OpNodeRnd, f32x_info, "SS">, 6707 EVEX_CD8<32, CD8VT1>, VEX_LIG; 6708 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6709 OpNodeRnd, f64x_info, "SD">, 6710 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 6711 } 6712} 6713 6714defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>; 6715defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; 6716defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; 6717defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; 6718 6719multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, 6720 string Suffix, SDNode Move, 6721 X86VectorVTInfo _, PatLeaf ZeroFP> { 6722 let Predicates = [HasAVX512] in { 6723 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6724 (Op _.FRC:$src2, 6725 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6726 _.FRC:$src3))))), 6727 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6728 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6729 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6730 6731 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6732 (Op _.FRC:$src2, _.FRC:$src3, 6733 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6734 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6735 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6736 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6737 6738 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6739 (Op _.FRC:$src2, 6740 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6741 (_.ScalarLdFrag addr:$src3)))))), 6742 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6743 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6744 addr:$src3)>; 6745 6746 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6747 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6748 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6749 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6750 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6751 addr:$src3)>; 6752 6753 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6754 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6755 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6756 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6757 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6758 addr:$src3)>; 6759 6760 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6761 (X86selects VK1WM:$mask, 6762 (Op _.FRC:$src2, 6763 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6764 _.FRC:$src3), 6765 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6766 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6767 VR128X:$src1, VK1WM:$mask, 6768 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6769 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6770 6771 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6772 (X86selects VK1WM:$mask, 6773 (Op _.FRC:$src2, 6774 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6775 (_.ScalarLdFrag addr:$src3)), 6776 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6777 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6778 VR128X:$src1, VK1WM:$mask, 6779 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6780 6781 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6782 (X86selects VK1WM:$mask, 6783 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6784 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6785 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6786 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6787 VR128X:$src1, VK1WM:$mask, 6788 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6789 6790 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6791 (X86selects VK1WM:$mask, 6792 (Op _.FRC:$src2, _.FRC:$src3, 6793 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6794 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6795 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 6796 VR128X:$src1, VK1WM:$mask, 6797 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6798 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6799 6800 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6801 (X86selects VK1WM:$mask, 6802 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6803 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6804 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6805 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 6806 VR128X:$src1, VK1WM:$mask, 6807 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6808 6809 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6810 (X86selects VK1WM:$mask, 6811 (Op _.FRC:$src2, 6812 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6813 _.FRC:$src3), 6814 (_.EltVT ZeroFP)))))), 6815 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 6816 VR128X:$src1, VK1WM:$mask, 6817 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6818 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6819 6820 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6821 (X86selects VK1WM:$mask, 6822 (Op _.FRC:$src2, _.FRC:$src3, 6823 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6824 (_.EltVT ZeroFP)))))), 6825 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 6826 VR128X:$src1, VK1WM:$mask, 6827 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6828 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6829 6830 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6831 (X86selects VK1WM:$mask, 6832 (Op _.FRC:$src2, 6833 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6834 (_.ScalarLdFrag addr:$src3)), 6835 (_.EltVT ZeroFP)))))), 6836 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 6837 VR128X:$src1, VK1WM:$mask, 6838 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6839 6840 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6841 (X86selects VK1WM:$mask, 6842 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6843 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 6844 (_.EltVT ZeroFP)))))), 6845 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 6846 VR128X:$src1, VK1WM:$mask, 6847 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6848 6849 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6850 (X86selects VK1WM:$mask, 6851 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6852 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6853 (_.EltVT ZeroFP)))))), 6854 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 6855 VR128X:$src1, VK1WM:$mask, 6856 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6857 6858 // Patterns with rounding mode. 6859 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6860 (RndOp _.FRC:$src2, 6861 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6862 _.FRC:$src3, (i32 timm:$rc)))))), 6863 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 6864 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6865 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6866 6867 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6868 (RndOp _.FRC:$src2, _.FRC:$src3, 6869 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6870 (i32 timm:$rc)))))), 6871 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 6872 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6873 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6874 6875 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6876 (X86selects VK1WM:$mask, 6877 (RndOp _.FRC:$src2, 6878 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6879 _.FRC:$src3, (i32 timm:$rc)), 6880 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6881 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 6882 VR128X:$src1, VK1WM:$mask, 6883 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6884 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6885 6886 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6887 (X86selects VK1WM:$mask, 6888 (RndOp _.FRC:$src2, _.FRC:$src3, 6889 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6890 (i32 timm:$rc)), 6891 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6892 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 6893 VR128X:$src1, VK1WM:$mask, 6894 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6895 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6896 6897 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6898 (X86selects VK1WM:$mask, 6899 (RndOp _.FRC:$src2, 6900 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6901 _.FRC:$src3, (i32 timm:$rc)), 6902 (_.EltVT ZeroFP)))))), 6903 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 6904 VR128X:$src1, VK1WM:$mask, 6905 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6906 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6907 6908 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6909 (X86selects VK1WM:$mask, 6910 (RndOp _.FRC:$src2, _.FRC:$src3, 6911 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6912 (i32 timm:$rc)), 6913 (_.EltVT ZeroFP)))))), 6914 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 6915 VR128X:$src1, VK1WM:$mask, 6916 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6917 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6918 } 6919} 6920 6921defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS", 6922 X86Movss, v4f32x_info, fp32imm0>; 6923defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS", 6924 X86Movss, v4f32x_info, fp32imm0>; 6925defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS", 6926 X86Movss, v4f32x_info, fp32imm0>; 6927defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS", 6928 X86Movss, v4f32x_info, fp32imm0>; 6929 6930defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD", 6931 X86Movsd, v2f64x_info, fp64imm0>; 6932defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD", 6933 X86Movsd, v2f64x_info, fp64imm0>; 6934defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD", 6935 X86Movsd, v2f64x_info, fp64imm0>; 6936defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD", 6937 X86Movsd, v2f64x_info, fp64imm0>; 6938 6939//===----------------------------------------------------------------------===// 6940// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 6941//===----------------------------------------------------------------------===// 6942let Constraints = "$src1 = $dst" in { 6943multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6944 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6945 // NOTE: The SDNode have the multiply operands first with the add last. 6946 // This enables commuted load patterns to be autogenerated by tablegen. 6947 let ExeDomain = _.ExeDomain in { 6948 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6949 (ins _.RC:$src2, _.RC:$src3), 6950 OpcodeStr, "$src3, $src2", "$src2, $src3", 6951 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6952 AVX512FMA3Base, Sched<[sched]>; 6953 6954 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6955 (ins _.RC:$src2, _.MemOp:$src3), 6956 OpcodeStr, "$src3, $src2", "$src2, $src3", 6957 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 6958 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6959 6960 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6961 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6962 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6963 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6964 (OpNode _.RC:$src2, 6965 (_.VT (_.BroadcastLdFrag addr:$src3)), 6966 _.RC:$src1)>, 6967 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6968 } 6969} 6970} // Constraints = "$src1 = $dst" 6971 6972multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6973 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6974 let Predicates = [HasIFMA] in { 6975 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6976 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6977 } 6978 let Predicates = [HasVLX, HasIFMA] in { 6979 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6980 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6981 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6982 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6983 } 6984} 6985 6986defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 6987 SchedWriteVecIMul, avx512vl_i64_info>, 6988 VEX_W; 6989defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 6990 SchedWriteVecIMul, avx512vl_i64_info>, 6991 VEX_W; 6992 6993//===----------------------------------------------------------------------===// 6994// AVX-512 Scalar convert from sign integer to float/double 6995//===----------------------------------------------------------------------===// 6996 6997multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 6998 RegisterClass SrcRC, X86VectorVTInfo DstVT, 6999 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7000 string mem> { 7001 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7002 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7003 (ins DstVT.FRC:$src1, SrcRC:$src), 7004 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7005 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7006 let mayLoad = 1 in 7007 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7008 (ins DstVT.FRC:$src1, x86memop:$src), 7009 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7010 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7011 } // hasSideEffects = 0 7012 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7013 (ins DstVT.RC:$src1, SrcRC:$src2), 7014 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7015 [(set DstVT.RC:$dst, 7016 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7017 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7018 7019 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7020 (ins DstVT.RC:$src1, x86memop:$src2), 7021 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7022 [(set DstVT.RC:$dst, 7023 (OpNode (DstVT.VT DstVT.RC:$src1), 7024 (ld_frag addr:$src2)))]>, 7025 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7026 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7027 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7028 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7029} 7030 7031multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7032 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7033 X86VectorVTInfo DstVT, string asm, 7034 string mem> { 7035 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7036 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7037 !strconcat(asm, 7038 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7039 [(set DstVT.RC:$dst, 7040 (OpNode (DstVT.VT DstVT.RC:$src1), 7041 SrcRC:$src2, 7042 (i32 timm:$rc)))]>, 7043 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7044 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7045 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7046 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7047} 7048 7049multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7050 X86FoldableSchedWrite sched, 7051 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7052 X86MemOperand x86memop, PatFrag ld_frag, 7053 string asm, string mem> { 7054 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7055 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7056 ld_frag, asm, mem>, VEX_LIG; 7057} 7058 7059let Predicates = [HasAVX512] in { 7060defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7061 WriteCvtI2SS, GR32, 7062 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7063 XS, EVEX_CD8<32, CD8VT1>; 7064defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7065 WriteCvtI2SS, GR64, 7066 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7067 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7068defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7069 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">, 7070 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7071defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7072 WriteCvtI2SD, GR64, 7073 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7074 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7075 7076def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7077 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7078def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7079 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7080 7081def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 7082 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7083def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 7084 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7085def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 7086 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7087def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 7088 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7089 7090def : Pat<(f32 (sint_to_fp GR32:$src)), 7091 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7092def : Pat<(f32 (sint_to_fp GR64:$src)), 7093 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7094def : Pat<(f64 (sint_to_fp GR32:$src)), 7095 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7096def : Pat<(f64 (sint_to_fp GR64:$src)), 7097 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7098 7099defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7100 WriteCvtI2SS, GR32, 7101 v4f32x_info, i32mem, loadi32, 7102 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7103defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7104 WriteCvtI2SS, GR64, 7105 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7106 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7107defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7108 i32mem, loadi32, "cvtusi2sd", "l">, 7109 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7110defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7111 WriteCvtI2SD, GR64, 7112 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7113 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7114 7115def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7116 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7117def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7118 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7119 7120def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), 7121 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7122def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))), 7123 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7124def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))), 7125 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7126def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))), 7127 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7128 7129def : Pat<(f32 (uint_to_fp GR32:$src)), 7130 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7131def : Pat<(f32 (uint_to_fp GR64:$src)), 7132 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7133def : Pat<(f64 (uint_to_fp GR32:$src)), 7134 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7135def : Pat<(f64 (uint_to_fp GR64:$src)), 7136 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7137} 7138 7139//===----------------------------------------------------------------------===// 7140// AVX-512 Scalar convert from float/double to integer 7141//===----------------------------------------------------------------------===// 7142 7143multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7144 X86VectorVTInfo DstVT, SDNode OpNode, 7145 SDNode OpNodeRnd, 7146 X86FoldableSchedWrite sched, string asm, 7147 string aliasStr> { 7148 let Predicates = [HasAVX512] in { 7149 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7150 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7151 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7152 EVEX, VEX_LIG, Sched<[sched]>; 7153 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7154 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7155 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7156 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7157 Sched<[sched]>; 7158 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7159 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7160 [(set DstVT.RC:$dst, (OpNode 7161 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>, 7162 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7163 } // Predicates = [HasAVX512] 7164 7165 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7166 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7167 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7168 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7169 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7170 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7171 SrcVT.IntScalarMemOp:$src), 0, "att">; 7172} 7173 7174// Convert float/double to signed/unsigned int 32/64 7175defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7176 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7177 XS, EVEX_CD8<32, CD8VT1>; 7178defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7179 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7180 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7181defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7182 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7183 XS, EVEX_CD8<32, CD8VT1>; 7184defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7185 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7186 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7187defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7188 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7189 XD, EVEX_CD8<64, CD8VT1>; 7190defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7191 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7192 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7193defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7194 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7195 XD, EVEX_CD8<64, CD8VT1>; 7196defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7197 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7198 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7199 7200// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7201// which produce unnecessary vmovs{s,d} instructions 7202let Predicates = [HasAVX512] in { 7203def : Pat<(v4f32 (X86Movss 7204 (v4f32 VR128X:$dst), 7205 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), 7206 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7207 7208def : Pat<(v4f32 (X86Movss 7209 (v4f32 VR128X:$dst), 7210 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), 7211 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7212 7213def : Pat<(v4f32 (X86Movss 7214 (v4f32 VR128X:$dst), 7215 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), 7216 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7217 7218def : Pat<(v4f32 (X86Movss 7219 (v4f32 VR128X:$dst), 7220 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), 7221 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7222 7223def : Pat<(v2f64 (X86Movsd 7224 (v2f64 VR128X:$dst), 7225 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), 7226 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7227 7228def : Pat<(v2f64 (X86Movsd 7229 (v2f64 VR128X:$dst), 7230 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), 7231 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7232 7233def : Pat<(v2f64 (X86Movsd 7234 (v2f64 VR128X:$dst), 7235 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), 7236 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7237 7238def : Pat<(v2f64 (X86Movsd 7239 (v2f64 VR128X:$dst), 7240 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), 7241 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7242 7243def : Pat<(v4f32 (X86Movss 7244 (v4f32 VR128X:$dst), 7245 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))), 7246 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7247 7248def : Pat<(v4f32 (X86Movss 7249 (v4f32 VR128X:$dst), 7250 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))), 7251 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7252 7253def : Pat<(v4f32 (X86Movss 7254 (v4f32 VR128X:$dst), 7255 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))), 7256 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7257 7258def : Pat<(v4f32 (X86Movss 7259 (v4f32 VR128X:$dst), 7260 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))), 7261 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7262 7263def : Pat<(v2f64 (X86Movsd 7264 (v2f64 VR128X:$dst), 7265 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))), 7266 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7267 7268def : Pat<(v2f64 (X86Movsd 7269 (v2f64 VR128X:$dst), 7270 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))), 7271 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7272 7273def : Pat<(v2f64 (X86Movsd 7274 (v2f64 VR128X:$dst), 7275 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))), 7276 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7277 7278def : Pat<(v2f64 (X86Movsd 7279 (v2f64 VR128X:$dst), 7280 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))), 7281 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7282} // Predicates = [HasAVX512] 7283 7284// Convert float/double to signed/unsigned int 32/64 with truncation 7285multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7286 X86VectorVTInfo _DstRC, SDNode OpNode, 7287 SDNode OpNodeInt, SDNode OpNodeSAE, 7288 X86FoldableSchedWrite sched, string aliasStr>{ 7289let Predicates = [HasAVX512] in { 7290 let isCodeGenOnly = 1 in { 7291 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7292 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7293 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7294 EVEX, VEX_LIG, Sched<[sched]>; 7295 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7296 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7297 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7298 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7299 } 7300 7301 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7302 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7303 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7304 EVEX, VEX_LIG, Sched<[sched]>; 7305 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7306 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7307 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7308 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7309 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7310 (ins _SrcRC.IntScalarMemOp:$src), 7311 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7312 [(set _DstRC.RC:$dst, 7313 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>, 7314 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7315} //HasAVX512 7316 7317 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7318 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7319 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7320 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7321 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7322 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7323 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7324} 7325 7326defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7327 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7328 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7329defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7330 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7331 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7332defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7333 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7334 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7335defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7336 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7337 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7338 7339defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7340 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7341 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7342defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7343 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7344 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7345defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7346 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7347 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7348defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7349 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7350 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7351 7352//===----------------------------------------------------------------------===// 7353// AVX-512 Convert form float to double and back 7354//===----------------------------------------------------------------------===// 7355 7356multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7357 X86VectorVTInfo _Src, SDNode OpNode, 7358 X86FoldableSchedWrite sched> { 7359 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7360 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7361 "$src2, $src1", "$src1, $src2", 7362 (_.VT (OpNode (_.VT _.RC:$src1), 7363 (_Src.VT _Src.RC:$src2)))>, 7364 EVEX_4V, VEX_LIG, Sched<[sched]>; 7365 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7366 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7367 "$src2, $src1", "$src1, $src2", 7368 (_.VT (OpNode (_.VT _.RC:$src1), 7369 (_Src.VT _Src.ScalarIntMemCPat:$src2)))>, 7370 EVEX_4V, VEX_LIG, 7371 Sched<[sched.Folded, sched.ReadAfterFold]>; 7372 7373 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7374 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7375 (ins _.FRC:$src1, _Src.FRC:$src2), 7376 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7377 EVEX_4V, VEX_LIG, Sched<[sched]>; 7378 let mayLoad = 1 in 7379 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7380 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7381 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7382 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7383 } 7384} 7385 7386// Scalar Coversion with SAE - suppress all exceptions 7387multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7388 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7389 X86FoldableSchedWrite sched> { 7390 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7391 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7392 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7393 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7394 (_Src.VT _Src.RC:$src2)))>, 7395 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7396} 7397 7398// Scalar Conversion with rounding control (RC) 7399multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7400 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7401 X86FoldableSchedWrite sched> { 7402 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7403 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7404 "$rc, $src2, $src1", "$src1, $src2, $rc", 7405 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7406 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7407 EVEX_4V, VEX_LIG, Sched<[sched]>, 7408 EVEX_B, EVEX_RC; 7409} 7410multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, 7411 SDNode OpNode, SDNode OpNodeRnd, 7412 X86FoldableSchedWrite sched, 7413 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7414 let Predicates = [HasAVX512] in { 7415 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7416 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7417 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; 7418 } 7419} 7420 7421multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, 7422 SDNode OpNode, SDNode OpNodeSAE, 7423 X86FoldableSchedWrite sched, 7424 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7425 let Predicates = [HasAVX512] in { 7426 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7427 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7428 EVEX_CD8<32, CD8VT1>, XS; 7429 } 7430} 7431defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds, 7432 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7433 f32x_info>; 7434defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts, 7435 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7436 f64x_info>; 7437 7438def : Pat<(f64 (fpextend FR32X:$src)), 7439 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7440 Requires<[HasAVX512]>; 7441def : Pat<(f64 (fpextend (loadf32 addr:$src))), 7442 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7443 Requires<[HasAVX512, OptForSize]>; 7444 7445def : Pat<(f32 (fpround FR64X:$src)), 7446 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7447 Requires<[HasAVX512]>; 7448 7449def : Pat<(v4f32 (X86Movss 7450 (v4f32 VR128X:$dst), 7451 (v4f32 (scalar_to_vector 7452 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7453 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7454 Requires<[HasAVX512]>; 7455 7456def : Pat<(v2f64 (X86Movsd 7457 (v2f64 VR128X:$dst), 7458 (v2f64 (scalar_to_vector 7459 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7460 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7461 Requires<[HasAVX512]>; 7462 7463//===----------------------------------------------------------------------===// 7464// AVX-512 Vector convert from signed/unsigned integer to float/double 7465// and from float/double to signed/unsigned integer 7466//===----------------------------------------------------------------------===// 7467 7468multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7469 X86VectorVTInfo _Src, SDNode OpNode, 7470 X86FoldableSchedWrite sched, 7471 string Broadcast = _.BroadcastStr, 7472 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7473 RegisterClass MaskRC = _.KRCWM, 7474 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7475 7476 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst), 7477 (ins _Src.RC:$src), 7478 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7479 (ins MaskRC:$mask, _Src.RC:$src), 7480 OpcodeStr, "$src", "$src", 7481 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7482 (vselect MaskRC:$mask, 7483 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7484 _.RC:$src0), 7485 vselect, "$src0 = $dst">, 7486 EVEX, Sched<[sched]>; 7487 7488 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), 7489 (ins MemOp:$src), 7490 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7491 (ins MaskRC:$mask, MemOp:$src), 7492 OpcodeStr#Alias, "$src", "$src", 7493 LdDAG, 7494 (vselect MaskRC:$mask, LdDAG, _.RC:$src0), 7495 vselect, "$src0 = $dst">, 7496 EVEX, Sched<[sched.Folded]>; 7497 7498 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), 7499 (ins _Src.ScalarMemOp:$src), 7500 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7501 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7502 OpcodeStr, 7503 "${src}"##Broadcast, "${src}"##Broadcast, 7504 (_.VT (OpNode (_Src.VT 7505 (_Src.BroadcastLdFrag addr:$src)) 7506 )), 7507 (vselect MaskRC:$mask, 7508 (_.VT 7509 (OpNode 7510 (_Src.VT 7511 (_Src.BroadcastLdFrag addr:$src)))), 7512 _.RC:$src0), 7513 vselect, "$src0 = $dst">, 7514 EVEX, EVEX_B, Sched<[sched.Folded]>; 7515} 7516// Coversion with SAE - suppress all exceptions 7517multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7518 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7519 X86FoldableSchedWrite sched> { 7520 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7521 (ins _Src.RC:$src), OpcodeStr, 7522 "{sae}, $src", "$src, {sae}", 7523 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7524 EVEX, EVEX_B, Sched<[sched]>; 7525} 7526 7527// Conversion with rounding control (RC) 7528multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7529 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7530 X86FoldableSchedWrite sched> { 7531 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7532 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7533 "$rc, $src", "$src, $rc", 7534 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7535 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7536} 7537 7538// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7539multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7540 X86VectorVTInfo _Src, SDNode OpNode, 7541 X86FoldableSchedWrite sched, 7542 string Broadcast = _.BroadcastStr, 7543 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7544 RegisterClass MaskRC = _.KRCWM> 7545 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias, 7546 MemOp, MaskRC, 7547 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7548 7549// Extend Float to Double 7550multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, 7551 X86SchedWriteWidths sched> { 7552 let Predicates = [HasAVX512] in { 7553 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, 7554 fpextend, sched.ZMM>, 7555 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, 7556 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7557 } 7558 let Predicates = [HasVLX] in { 7559 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, 7560 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7561 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend, 7562 sched.YMM>, EVEX_V256; 7563 } 7564} 7565 7566// Truncate Double to Float 7567multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7568 let Predicates = [HasAVX512] in { 7569 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>, 7570 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, 7571 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7572 } 7573 let Predicates = [HasVLX] in { 7574 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, 7575 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>, 7576 EVEX_V128; 7577 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround, 7578 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7579 } 7580 7581 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7582 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7583 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7584 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7585 VK2WM:$mask, VR128X:$src), 0, "att">; 7586 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|" 7587 "$dst {${mask}} {z}, $src}", 7588 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7589 VK2WM:$mask, VR128X:$src), 0, "att">; 7590 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7591 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7592 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7593 "$dst {${mask}}, ${src}{1to2}}", 7594 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7595 VK2WM:$mask, f64mem:$src), 0, "att">; 7596 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7597 "$dst {${mask}} {z}, ${src}{1to2}}", 7598 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7599 VK2WM:$mask, f64mem:$src), 0, "att">; 7600 7601 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7602 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7603 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7604 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7605 VK4WM:$mask, VR256X:$src), 0, "att">; 7606 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|" 7607 "$dst {${mask}} {z}, $src}", 7608 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7609 VK4WM:$mask, VR256X:$src), 0, "att">; 7610 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7611 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7612 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7613 "$dst {${mask}}, ${src}{1to4}}", 7614 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7615 VK4WM:$mask, f64mem:$src), 0, "att">; 7616 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7617 "$dst {${mask}} {z}, ${src}{1to4}}", 7618 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7619 VK4WM:$mask, f64mem:$src), 0, "att">; 7620} 7621 7622defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, 7623 VEX_W, PD, EVEX_CD8<64, CD8VF>; 7624defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, 7625 PS, EVEX_CD8<32, CD8VH>; 7626 7627let Predicates = [HasAVX512] in { 7628 def : Pat<(v8f32 (fpround (v8f64 VR512:$src))), 7629 (VCVTPD2PSZrr VR512:$src)>; 7630 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), 7631 VR256X:$src0), 7632 (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>; 7633 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), 7634 v8f32x_info.ImmAllZerosV), 7635 (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>; 7636 7637 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), 7638 (VCVTPD2PSZrm addr:$src)>; 7639 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), 7640 VR256X:$src0), 7641 (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 7642 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), 7643 v8f32x_info.ImmAllZerosV), 7644 (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>; 7645 7646 def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))), 7647 (VCVTPD2PSZrmb addr:$src)>; 7648 def : Pat<(vselect VK8WM:$mask, 7649 (fpround (v8f64 (X86VBroadcastld64 addr:$src))), 7650 (v8f32 VR256X:$src0)), 7651 (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>; 7652 def : Pat<(vselect VK8WM:$mask, 7653 (fpround (v8f64 (X86VBroadcastld64 addr:$src))), 7654 v8f32x_info.ImmAllZerosV), 7655 (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; 7656} 7657 7658let Predicates = [HasVLX] in { 7659 def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))), 7660 (VCVTPD2PSZ256rr VR256X:$src)>; 7661 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), 7662 VR128X:$src0), 7663 (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 7664 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), 7665 v4f32x_info.ImmAllZerosV), 7666 (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>; 7667 7668 def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), 7669 (VCVTPD2PSZ256rm addr:$src)>; 7670 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), 7671 VR128X:$src0), 7672 (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 7673 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), 7674 v4f32x_info.ImmAllZerosV), 7675 (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>; 7676 7677 def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), 7678 (VCVTPD2PSZ256rmb addr:$src)>; 7679 def : Pat<(vselect VK4WM:$mask, 7680 (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), 7681 VR128X:$src0), 7682 (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 7683 def : Pat<(vselect VK4WM:$mask, 7684 (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), 7685 v4f32x_info.ImmAllZerosV), 7686 (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; 7687 7688 // Special patterns to allow use of X86vmfpround for masking. Instruction 7689 // patterns have been disabled with null_frag. 7690 def : Pat<(X86vfpround (v2f64 VR128X:$src)), 7691 (VCVTPD2PSZ128rr VR128X:$src)>; 7692 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), 7693 VK2WM:$mask), 7694 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 7695 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV, 7696 VK2WM:$mask), 7697 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 7698 7699 def : Pat<(X86vfpround (loadv2f64 addr:$src)), 7700 (VCVTPD2PSZ128rm addr:$src)>; 7701 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), 7702 VK2WM:$mask), 7703 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7704 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV, 7705 VK2WM:$mask), 7706 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; 7707 7708 def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))), 7709 (VCVTPD2PSZ128rmb addr:$src)>; 7710 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7711 (v4f32 VR128X:$src0), VK2WM:$mask), 7712 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7713 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7714 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 7715 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 7716} 7717 7718// Convert Signed/Unsigned Doubleword to Double 7719multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7720 SDNode OpNode128, X86SchedWriteWidths sched> { 7721 // No rounding in this op 7722 let Predicates = [HasAVX512] in 7723 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 7724 sched.ZMM>, EVEX_V512; 7725 7726 let Predicates = [HasVLX] in { 7727 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 7728 OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM, 7729 (v2f64 (OpNode128 (bc_v4i32 7730 (v2i64 7731 (scalar_to_vector (loadi64 addr:$src))))))>, 7732 EVEX_V128; 7733 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 7734 sched.YMM>, EVEX_V256; 7735 } 7736} 7737 7738// Convert Signed/Unsigned Doubleword to Float 7739multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7740 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7741 let Predicates = [HasAVX512] in 7742 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 7743 sched.ZMM>, 7744 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 7745 OpNodeRnd, sched.ZMM>, EVEX_V512; 7746 7747 let Predicates = [HasVLX] in { 7748 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 7749 sched.XMM>, EVEX_V128; 7750 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 7751 sched.YMM>, EVEX_V256; 7752 } 7753} 7754 7755// Convert Float to Signed/Unsigned Doubleword with truncation 7756multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7757 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7758 let Predicates = [HasAVX512] in { 7759 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7760 sched.ZMM>, 7761 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 7762 OpNodeSAE, sched.ZMM>, EVEX_V512; 7763 } 7764 let Predicates = [HasVLX] in { 7765 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7766 sched.XMM>, EVEX_V128; 7767 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7768 sched.YMM>, EVEX_V256; 7769 } 7770} 7771 7772// Convert Float to Signed/Unsigned Doubleword 7773multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7774 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7775 let Predicates = [HasAVX512] in { 7776 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7777 sched.ZMM>, 7778 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 7779 OpNodeRnd, sched.ZMM>, EVEX_V512; 7780 } 7781 let Predicates = [HasVLX] in { 7782 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7783 sched.XMM>, EVEX_V128; 7784 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7785 sched.YMM>, EVEX_V256; 7786 } 7787} 7788 7789// Convert Double to Signed/Unsigned Doubleword with truncation 7790multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7791 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7792 let Predicates = [HasAVX512] in { 7793 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7794 sched.ZMM>, 7795 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 7796 OpNodeSAE, sched.ZMM>, EVEX_V512; 7797 } 7798 let Predicates = [HasVLX] in { 7799 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7800 // memory forms of these instructions in Asm Parser. They have the same 7801 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7802 // due to the same reason. 7803 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7804 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7805 VK2WM>, EVEX_V128; 7806 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7807 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7808 } 7809 7810 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7811 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 7812 VR128X:$src), 0, "att">; 7813 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7814 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7815 VK2WM:$mask, VR128X:$src), 0, "att">; 7816 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7817 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7818 VK2WM:$mask, VR128X:$src), 0, "att">; 7819 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7820 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7821 f64mem:$src), 0, "att">; 7822 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7823 "$dst {${mask}}, ${src}{1to2}}", 7824 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7825 VK2WM:$mask, f64mem:$src), 0, "att">; 7826 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7827 "$dst {${mask}} {z}, ${src}{1to2}}", 7828 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7829 VK2WM:$mask, f64mem:$src), 0, "att">; 7830 7831 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7832 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 7833 VR256X:$src), 0, "att">; 7834 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7835 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7836 VK4WM:$mask, VR256X:$src), 0, "att">; 7837 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7838 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7839 VK4WM:$mask, VR256X:$src), 0, "att">; 7840 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7841 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7842 f64mem:$src), 0, "att">; 7843 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7844 "$dst {${mask}}, ${src}{1to4}}", 7845 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7846 VK4WM:$mask, f64mem:$src), 0, "att">; 7847 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7848 "$dst {${mask}} {z}, ${src}{1to4}}", 7849 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7850 VK4WM:$mask, f64mem:$src), 0, "att">; 7851} 7852 7853// Convert Double to Signed/Unsigned Doubleword 7854multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7855 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7856 let Predicates = [HasAVX512] in { 7857 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7858 sched.ZMM>, 7859 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 7860 OpNodeRnd, sched.ZMM>, EVEX_V512; 7861 } 7862 let Predicates = [HasVLX] in { 7863 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7864 // memory forms of these instructions in Asm Parcer. They have the same 7865 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7866 // due to the same reason. 7867 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7868 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7869 VK2WM>, EVEX_V128; 7870 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7871 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7872 } 7873 7874 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7875 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7876 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7877 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7878 VK2WM:$mask, VR128X:$src), 0, "att">; 7879 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7880 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7881 VK2WM:$mask, VR128X:$src), 0, "att">; 7882 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7883 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7884 f64mem:$src), 0, "att">; 7885 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7886 "$dst {${mask}}, ${src}{1to2}}", 7887 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7888 VK2WM:$mask, f64mem:$src), 0, "att">; 7889 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7890 "$dst {${mask}} {z}, ${src}{1to2}}", 7891 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7892 VK2WM:$mask, f64mem:$src), 0, "att">; 7893 7894 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7895 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7896 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7897 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7898 VK4WM:$mask, VR256X:$src), 0, "att">; 7899 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7900 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7901 VK4WM:$mask, VR256X:$src), 0, "att">; 7902 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7903 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7904 f64mem:$src), 0, "att">; 7905 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7906 "$dst {${mask}}, ${src}{1to4}}", 7907 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7908 VK4WM:$mask, f64mem:$src), 0, "att">; 7909 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7910 "$dst {${mask}} {z}, ${src}{1to4}}", 7911 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7912 VK4WM:$mask, f64mem:$src), 0, "att">; 7913} 7914 7915// Convert Double to Signed/Unsigned Quardword 7916multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7917 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7918 let Predicates = [HasDQI] in { 7919 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7920 sched.ZMM>, 7921 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 7922 OpNodeRnd, sched.ZMM>, EVEX_V512; 7923 } 7924 let Predicates = [HasDQI, HasVLX] in { 7925 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7926 sched.XMM>, EVEX_V128; 7927 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7928 sched.YMM>, EVEX_V256; 7929 } 7930} 7931 7932// Convert Double to Signed/Unsigned Quardword with truncation 7933multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7934 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7935 let Predicates = [HasDQI] in { 7936 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7937 sched.ZMM>, 7938 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 7939 OpNodeRnd, sched.ZMM>, EVEX_V512; 7940 } 7941 let Predicates = [HasDQI, HasVLX] in { 7942 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7943 sched.XMM>, EVEX_V128; 7944 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7945 sched.YMM>, EVEX_V256; 7946 } 7947} 7948 7949// Convert Signed/Unsigned Quardword to Double 7950multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7951 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7952 let Predicates = [HasDQI] in { 7953 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 7954 sched.ZMM>, 7955 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 7956 OpNodeRnd, sched.ZMM>, EVEX_V512; 7957 } 7958 let Predicates = [HasDQI, HasVLX] in { 7959 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 7960 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 7961 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 7962 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 7963 } 7964} 7965 7966// Convert Float to Signed/Unsigned Quardword 7967multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7968 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7969 let Predicates = [HasDQI] in { 7970 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 7971 sched.ZMM>, 7972 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 7973 OpNodeRnd, sched.ZMM>, EVEX_V512; 7974 } 7975 let Predicates = [HasDQI, HasVLX] in { 7976 // Explicitly specified broadcast string, since we take only 2 elements 7977 // from v4f32x_info source 7978 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 7979 sched.XMM, "{1to2}", "", f64mem, VK2WM, 7980 (v2i64 (OpNode (bc_v4f32 7981 (v2f64 7982 (scalar_to_vector (loadf64 addr:$src))))))>, 7983 EVEX_V128; 7984 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 7985 sched.YMM>, EVEX_V256; 7986 } 7987} 7988 7989// Convert Float to Signed/Unsigned Quardword with truncation 7990multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7991 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7992 let Predicates = [HasDQI] in { 7993 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>, 7994 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 7995 OpNodeRnd, sched.ZMM>, EVEX_V512; 7996 } 7997 let Predicates = [HasDQI, HasVLX] in { 7998 // Explicitly specified broadcast string, since we take only 2 elements 7999 // from v4f32x_info source 8000 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8001 sched.XMM, "{1to2}", "", f64mem, VK2WM, 8002 (v2i64 (OpNode (bc_v4f32 8003 (v2f64 8004 (scalar_to_vector (loadf64 addr:$src))))))>, 8005 EVEX_V128; 8006 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8007 sched.YMM>, EVEX_V256; 8008 } 8009} 8010 8011// Convert Signed/Unsigned Quardword to Float 8012multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 8013 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 8014 let Predicates = [HasDQI] in { 8015 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, 8016 sched.ZMM>, 8017 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, 8018 OpNodeRnd, sched.ZMM>, EVEX_V512; 8019 } 8020 let Predicates = [HasDQI, HasVLX] in { 8021 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8022 // memory forms of these instructions in Asm Parcer. They have the same 8023 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8024 // due to the same reason. 8025 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag, 8026 sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>, 8027 EVEX_V128, NotEVEX2VEXConvertible; 8028 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, 8029 sched.YMM, "{1to4}", "{y}">, EVEX_V256, 8030 NotEVEX2VEXConvertible; 8031 } 8032 8033 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 8034 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8035 VR128X:$src), 0, "att">; 8036 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8037 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8038 VK2WM:$mask, VR128X:$src), 0, "att">; 8039 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8040 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8041 VK2WM:$mask, VR128X:$src), 0, "att">; 8042 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8043 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8044 i64mem:$src), 0, "att">; 8045 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 8046 "$dst {${mask}}, ${src}{1to2}}", 8047 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8048 VK2WM:$mask, i64mem:$src), 0, "att">; 8049 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8050 "$dst {${mask}} {z}, ${src}{1to2}}", 8051 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8052 VK2WM:$mask, i64mem:$src), 0, "att">; 8053 8054 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 8055 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8056 VR256X:$src), 0, "att">; 8057 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|" 8058 "$dst {${mask}}, $src}", 8059 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8060 VK4WM:$mask, VR256X:$src), 0, "att">; 8061 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|" 8062 "$dst {${mask}} {z}, $src}", 8063 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8064 VK4WM:$mask, VR256X:$src), 0, "att">; 8065 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8066 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8067 i64mem:$src), 0, "att">; 8068 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 8069 "$dst {${mask}}, ${src}{1to4}}", 8070 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8071 VK4WM:$mask, i64mem:$src), 0, "att">; 8072 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8073 "$dst {${mask}} {z}, ${src}{1to4}}", 8074 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8075 VK4WM:$mask, i64mem:$src), 0, "att">; 8076} 8077 8078defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP, 8079 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8080 8081defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, 8082 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8083 PS, EVEX_CD8<32, CD8VF>; 8084 8085defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si, 8086 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, 8087 XS, EVEX_CD8<32, CD8VF>; 8088 8089defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si, 8090 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, 8091 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8092 8093defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui, 8094 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS, 8095 EVEX_CD8<32, CD8VF>; 8096 8097defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui, 8098 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, 8099 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8100 8101defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, 8102 X86VUintToFP, SchedWriteCvtDQ2PD>, XS, 8103 EVEX_CD8<32, CD8VH>; 8104 8105defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, 8106 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD, 8107 EVEX_CD8<32, CD8VF>; 8108 8109defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, 8110 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8111 EVEX_CD8<32, CD8VF>; 8112 8113defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, 8114 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8115 VEX_W, EVEX_CD8<64, CD8VF>; 8116 8117defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, 8118 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8119 PS, EVEX_CD8<32, CD8VF>; 8120 8121defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, 8122 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8123 PS, EVEX_CD8<64, CD8VF>; 8124 8125defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, 8126 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8127 PD, EVEX_CD8<64, CD8VF>; 8128 8129defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, 8130 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8131 EVEX_CD8<32, CD8VH>; 8132 8133defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, 8134 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8135 PD, EVEX_CD8<64, CD8VF>; 8136 8137defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, 8138 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8139 EVEX_CD8<32, CD8VH>; 8140 8141defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si, 8142 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W, 8143 PD, EVEX_CD8<64, CD8VF>; 8144 8145defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si, 8146 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD, 8147 EVEX_CD8<32, CD8VH>; 8148 8149defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui, 8150 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W, 8151 PD, EVEX_CD8<64, CD8VF>; 8152 8153defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui, 8154 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD, 8155 EVEX_CD8<32, CD8VH>; 8156 8157defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, 8158 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8159 EVEX_CD8<64, CD8VF>; 8160 8161defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, 8162 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8163 EVEX_CD8<64, CD8VF>; 8164 8165defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, 8166 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS, 8167 EVEX_CD8<64, CD8VF>; 8168 8169defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, 8170 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, 8171 EVEX_CD8<64, CD8VF>; 8172 8173let Predicates = [HasVLX] in { 8174 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8175 // patterns have been disabled with null_frag. 8176 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8177 (VCVTPD2DQZ128rr VR128X:$src)>; 8178 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8179 VK2WM:$mask), 8180 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8181 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8182 VK2WM:$mask), 8183 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8184 8185 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8186 (VCVTPD2DQZ128rm addr:$src)>; 8187 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8188 VK2WM:$mask), 8189 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8190 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8191 VK2WM:$mask), 8192 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8193 8194 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8195 (VCVTPD2DQZ128rmb addr:$src)>; 8196 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8197 (v4i32 VR128X:$src0), VK2WM:$mask), 8198 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8199 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8200 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8201 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8202 8203 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8204 // patterns have been disabled with null_frag. 8205 def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))), 8206 (VCVTTPD2DQZ128rr VR128X:$src)>; 8207 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8208 VK2WM:$mask), 8209 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8210 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8211 VK2WM:$mask), 8212 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8213 8214 def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))), 8215 (VCVTTPD2DQZ128rm addr:$src)>; 8216 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8217 VK2WM:$mask), 8218 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8219 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8220 VK2WM:$mask), 8221 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8222 8223 def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8224 (VCVTTPD2DQZ128rmb addr:$src)>; 8225 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8226 (v4i32 VR128X:$src0), VK2WM:$mask), 8227 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8228 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8229 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8230 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8231 8232 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8233 // patterns have been disabled with null_frag. 8234 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8235 (VCVTPD2UDQZ128rr VR128X:$src)>; 8236 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8237 VK2WM:$mask), 8238 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8239 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8240 VK2WM:$mask), 8241 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8242 8243 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8244 (VCVTPD2UDQZ128rm addr:$src)>; 8245 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8246 VK2WM:$mask), 8247 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8248 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8249 VK2WM:$mask), 8250 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8251 8252 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8253 (VCVTPD2UDQZ128rmb addr:$src)>; 8254 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8255 (v4i32 VR128X:$src0), VK2WM:$mask), 8256 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8257 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8258 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8259 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8260 8261 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8262 // patterns have been disabled with null_frag. 8263 def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))), 8264 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8265 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8266 VK2WM:$mask), 8267 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8268 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8269 VK2WM:$mask), 8270 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8271 8272 def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))), 8273 (VCVTTPD2UDQZ128rm addr:$src)>; 8274 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8275 VK2WM:$mask), 8276 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8277 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8278 VK2WM:$mask), 8279 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8280 8281 def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8282 (VCVTTPD2UDQZ128rmb addr:$src)>; 8283 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8284 (v4i32 VR128X:$src0), VK2WM:$mask), 8285 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8286 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8287 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8288 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8289} 8290 8291let Predicates = [HasDQI, HasVLX] in { 8292 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8293 (VCVTPS2QQZ128rm addr:$src)>; 8294 def : Pat<(v2i64 (vselect VK2WM:$mask, 8295 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8296 VR128X:$src0)), 8297 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8298 def : Pat<(v2i64 (vselect VK2WM:$mask, 8299 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8300 v2i64x_info.ImmAllZerosV)), 8301 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8302 8303 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8304 (VCVTPS2UQQZ128rm addr:$src)>; 8305 def : Pat<(v2i64 (vselect VK2WM:$mask, 8306 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8307 VR128X:$src0)), 8308 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8309 def : Pat<(v2i64 (vselect VK2WM:$mask, 8310 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8311 v2i64x_info.ImmAllZerosV)), 8312 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8313 8314 def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8315 (VCVTTPS2QQZ128rm addr:$src)>; 8316 def : Pat<(v2i64 (vselect VK2WM:$mask, 8317 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8318 VR128X:$src0)), 8319 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8320 def : Pat<(v2i64 (vselect VK2WM:$mask, 8321 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8322 v2i64x_info.ImmAllZerosV)), 8323 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8324 8325 def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8326 (VCVTTPS2UQQZ128rm addr:$src)>; 8327 def : Pat<(v2i64 (vselect VK2WM:$mask, 8328 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8329 VR128X:$src0)), 8330 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8331 def : Pat<(v2i64 (vselect VK2WM:$mask, 8332 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8333 v2i64x_info.ImmAllZerosV)), 8334 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8335} 8336 8337let Predicates = [HasAVX512, NoVLX] in { 8338def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))), 8339 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 8340 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), 8341 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8342 8343def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))), 8344 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 8345 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), 8346 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8347 8348def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))), 8349 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr 8350 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8351 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8352 8353def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), 8354 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr 8355 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 8356 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8357 8358def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), 8359 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr 8360 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 8361 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8362 8363def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), 8364 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr 8365 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), 8366 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8367 8368def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))), 8369 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr 8370 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), 8371 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8372} 8373 8374let Predicates = [HasVLX] in { 8375 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8376 (VCVTDQ2PDZ128rm addr:$src)>; 8377 def : Pat<(v2f64 (vselect VK2WM:$mask, 8378 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8379 VR128X:$src0)), 8380 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8381 def : Pat<(v2f64 (vselect VK2WM:$mask, 8382 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8383 v2f64x_info.ImmAllZerosV)), 8384 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8385 8386 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8387 (VCVTUDQ2PDZ128rm addr:$src)>; 8388 def : Pat<(v2f64 (vselect VK2WM:$mask, 8389 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8390 VR128X:$src0)), 8391 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8392 def : Pat<(v2f64 (vselect VK2WM:$mask, 8393 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8394 v2f64x_info.ImmAllZerosV)), 8395 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8396} 8397 8398let Predicates = [HasDQI, HasVLX] in { 8399 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 8400 // patterns have been disabled with null_frag. 8401 def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))), 8402 (VCVTQQ2PSZ128rr VR128X:$src)>; 8403 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8404 VK2WM:$mask), 8405 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8406 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8407 VK2WM:$mask), 8408 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8409 8410 def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))), 8411 (VCVTQQ2PSZ128rm addr:$src)>; 8412 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8413 VK2WM:$mask), 8414 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8415 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8416 VK2WM:$mask), 8417 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8418 8419 def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8420 (VCVTQQ2PSZ128rmb addr:$src)>; 8421 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8422 (v4f32 VR128X:$src0), VK2WM:$mask), 8423 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8424 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8425 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8426 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8427 8428 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 8429 // patterns have been disabled with null_frag. 8430 def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))), 8431 (VCVTUQQ2PSZ128rr VR128X:$src)>; 8432 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8433 VK2WM:$mask), 8434 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8435 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8436 VK2WM:$mask), 8437 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8438 8439 def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))), 8440 (VCVTUQQ2PSZ128rm addr:$src)>; 8441 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8442 VK2WM:$mask), 8443 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8444 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8445 VK2WM:$mask), 8446 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8447 8448 def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8449 (VCVTUQQ2PSZ128rmb addr:$src)>; 8450 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8451 (v4f32 VR128X:$src0), VK2WM:$mask), 8452 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8453 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8454 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8455 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8456} 8457 8458let Predicates = [HasDQI, NoVLX] in { 8459def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))), 8460 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr 8461 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8462 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8463 8464def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))), 8465 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr 8466 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), 8467 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8468 8469def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))), 8470 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr 8471 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8472 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8473 8474def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))), 8475 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr 8476 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8477 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8478 8479def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))), 8480 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr 8481 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), 8482 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8483 8484def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))), 8485 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr 8486 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8487 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8488 8489def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))), 8490 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr 8491 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8492 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8493 8494def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))), 8495 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr 8496 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8497 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8498 8499def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))), 8500 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr 8501 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8502 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8503 8504def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))), 8505 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr 8506 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8507 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8508 8509def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))), 8510 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr 8511 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8512 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8513 8514def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), 8515 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr 8516 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8517 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8518} 8519 8520//===----------------------------------------------------------------------===// 8521// Half precision conversion instructions 8522//===----------------------------------------------------------------------===// 8523 8524multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8525 X86MemOperand x86memop, PatFrag ld_frag, 8526 X86FoldableSchedWrite sched> { 8527 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8528 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8529 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8530 T8PD, Sched<[sched]>; 8531 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8532 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8533 (X86cvtph2ps (_src.VT 8534 (ld_frag addr:$src)))>, 8535 T8PD, Sched<[sched.Folded]>; 8536} 8537 8538multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8539 X86FoldableSchedWrite sched> { 8540 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8541 (ins _src.RC:$src), "vcvtph2ps", 8542 "{sae}, $src", "$src, {sae}", 8543 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8544 T8PD, EVEX_B, Sched<[sched]>; 8545} 8546 8547let Predicates = [HasAVX512] in 8548 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load, 8549 WriteCvtPH2PSZ>, 8550 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8551 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8552 8553let Predicates = [HasVLX] in { 8554 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8555 load, WriteCvtPH2PSY>, EVEX, EVEX_V256, 8556 EVEX_CD8<32, CD8VH>; 8557 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8558 load, WriteCvtPH2PS>, EVEX, EVEX_V128, 8559 EVEX_CD8<32, CD8VH>; 8560 8561 // Pattern match vcvtph2ps of a scalar i64 load. 8562 def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 8563 (VCVTPH2PSZ128rm addr:$src)>; 8564 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert 8565 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8566 (VCVTPH2PSZ128rm addr:$src)>; 8567} 8568 8569multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8570 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8571let ExeDomain = GenericDomain in { 8572 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8573 (ins _src.RC:$src1, i32u8imm:$src2), 8574 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8575 [(set _dest.RC:$dst, 8576 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8577 Sched<[RR]>; 8578 let Constraints = "$src0 = $dst" in 8579 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8580 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8581 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8582 [(set _dest.RC:$dst, 8583 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8584 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8585 Sched<[RR]>, EVEX_K; 8586 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8587 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8588 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8589 [(set _dest.RC:$dst, 8590 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8591 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8592 Sched<[RR]>, EVEX_KZ; 8593 let hasSideEffects = 0, mayStore = 1 in { 8594 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8595 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8596 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8597 Sched<[MR]>; 8598 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8599 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8600 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8601 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 8602 } 8603} 8604} 8605 8606multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8607 SchedWrite Sched> { 8608 let hasSideEffects = 0 in 8609 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 8610 (outs _dest.RC:$dst), 8611 (ins _src.RC:$src1, i32u8imm:$src2), 8612 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 8613 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 8614} 8615 8616let Predicates = [HasAVX512] in { 8617 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8618 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8619 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8620 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8621 let Predicates = [HasVLX] in { 8622 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 8623 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 8624 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 8625 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 8626 WriteCvtPS2PH, WriteCvtPS2PHSt>, 8627 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 8628 } 8629 8630 def : Pat<(store (f64 (extractelt 8631 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), 8632 (iPTR 0))), addr:$dst), 8633 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8634 def : Pat<(store (i64 (extractelt 8635 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), 8636 (iPTR 0))), addr:$dst), 8637 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8638 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 8639 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 8640 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 8641 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 8642} 8643 8644// Patterns for matching conversions from float to half-float and vice versa. 8645let Predicates = [HasVLX] in { 8646 // Use MXCSR.RC for rounding instead of explicitly specifying the default 8647 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the 8648 // configurations we support (the default). However, falling back to MXCSR is 8649 // more consistent with other instructions, which are always controlled by it. 8650 // It's encoded as 0b100. 8651 def : Pat<(fp_to_f16 FR32X:$src), 8652 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr 8653 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>; 8654 8655 def : Pat<(f16_to_fp GR16:$src), 8656 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8657 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >; 8658 8659 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))), 8660 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8661 (v8i16 (VCVTPS2PHZ128rr 8662 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >; 8663} 8664 8665// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 8666multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 8667 string OpcodeStr, X86FoldableSchedWrite sched> { 8668 let hasSideEffects = 0 in 8669 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 8670 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 8671 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 8672} 8673 8674let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8675 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, 8676 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8677 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, 8678 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8679 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, 8680 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8681 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, 8682 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8683} 8684 8685let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8686 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, 8687 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, 8688 EVEX_CD8<32, CD8VT1>; 8689 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, 8690 "ucomisd", WriteFCom>, PD, EVEX, 8691 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8692 let Pattern = []<dag> in { 8693 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, 8694 "comiss", WriteFCom>, PS, EVEX, VEX_LIG, 8695 EVEX_CD8<32, CD8VT1>; 8696 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, 8697 "comisd", WriteFCom>, PD, EVEX, 8698 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8699 } 8700 let isCodeGenOnly = 1 in { 8701 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 8702 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, 8703 EVEX_CD8<32, CD8VT1>; 8704 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 8705 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, 8706 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8707 8708 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 8709 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, 8710 EVEX_CD8<32, CD8VT1>; 8711 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 8712 sse_load_f64, "comisd", WriteFCom>, PD, EVEX, 8713 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8714 } 8715} 8716 8717/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd 8718multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8719 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8720 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 8721 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8722 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8723 "$src2, $src1", "$src1, $src2", 8724 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8725 EVEX_4V, VEX_LIG, Sched<[sched]>; 8726 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8727 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8728 "$src2, $src1", "$src1, $src2", 8729 (OpNode (_.VT _.RC:$src1), 8730 _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG, 8731 Sched<[sched.Folded, sched.ReadAfterFold]>; 8732} 8733} 8734 8735defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 8736 f32x_info>, EVEX_CD8<32, CD8VT1>, 8737 T8PD; 8738defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 8739 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 8740 T8PD; 8741defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 8742 SchedWriteFRsqrt.Scl, f32x_info>, 8743 EVEX_CD8<32, CD8VT1>, T8PD; 8744defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 8745 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 8746 EVEX_CD8<64, CD8VT1>, T8PD; 8747 8748/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 8749multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 8750 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8751 let ExeDomain = _.ExeDomain in { 8752 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8753 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8754 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 8755 Sched<[sched]>; 8756 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8757 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8758 (OpNode (_.VT 8759 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 8760 Sched<[sched.Folded, sched.ReadAfterFold]>; 8761 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8762 (ins _.ScalarMemOp:$src), OpcodeStr, 8763 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8764 (OpNode (_.VT 8765 (_.BroadcastLdFrag addr:$src)))>, 8766 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8767 } 8768} 8769 8770multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 8771 X86SchedWriteWidths sched> { 8772 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM, 8773 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 8774 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM, 8775 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8776 8777 // Define only if AVX512VL feature is present. 8778 let Predicates = [HasVLX] in { 8779 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8780 OpNode, sched.XMM, v4f32x_info>, 8781 EVEX_V128, EVEX_CD8<32, CD8VF>; 8782 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8783 OpNode, sched.YMM, v8f32x_info>, 8784 EVEX_V256, EVEX_CD8<32, CD8VF>; 8785 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8786 OpNode, sched.XMM, v2f64x_info>, 8787 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 8788 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8789 OpNode, sched.YMM, v4f64x_info>, 8790 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 8791 } 8792} 8793 8794defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; 8795defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; 8796 8797/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 8798multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 8799 SDNode OpNode, SDNode OpNodeSAE, 8800 X86FoldableSchedWrite sched> { 8801 let ExeDomain = _.ExeDomain in { 8802 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8803 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8804 "$src2, $src1", "$src1, $src2", 8805 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8806 Sched<[sched]>; 8807 8808 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8809 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8810 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 8811 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8812 EVEX_B, Sched<[sched]>; 8813 8814 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8815 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8816 "$src2, $src1", "$src1, $src2", 8817 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>, 8818 Sched<[sched.Folded, sched.ReadAfterFold]>; 8819 } 8820} 8821 8822multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8823 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 8824 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 8825 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG; 8826 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 8827 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 8828} 8829 8830let Predicates = [HasERI] in { 8831 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 8832 SchedWriteFRcp.Scl>, T8PD, EVEX_4V; 8833 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 8834 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; 8835} 8836 8837defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 8838 SchedWriteFRnd.Scl>, T8PD, EVEX_4V; 8839/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 8840 8841multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8842 SDNode OpNode, X86FoldableSchedWrite sched> { 8843 let ExeDomain = _.ExeDomain in { 8844 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8845 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8846 (OpNode (_.VT _.RC:$src))>, 8847 Sched<[sched]>; 8848 8849 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8850 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8851 (OpNode (_.VT 8852 (bitconvert (_.LdFrag addr:$src))))>, 8853 Sched<[sched.Folded, sched.ReadAfterFold]>; 8854 8855 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8856 (ins _.ScalarMemOp:$src), OpcodeStr, 8857 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8858 (OpNode (_.VT 8859 (_.BroadcastLdFrag addr:$src)))>, 8860 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8861 } 8862} 8863multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8864 SDNode OpNode, X86FoldableSchedWrite sched> { 8865 let ExeDomain = _.ExeDomain in 8866 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8867 (ins _.RC:$src), OpcodeStr, 8868 "{sae}, $src", "$src, {sae}", 8869 (OpNode (_.VT _.RC:$src))>, 8870 EVEX_B, Sched<[sched]>; 8871} 8872 8873multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 8874 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8875 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8876 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 8877 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 8878 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8879 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 8880 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8881} 8882 8883multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 8884 SDNode OpNode, X86SchedWriteWidths sched> { 8885 // Define only if AVX512VL feature is present. 8886 let Predicates = [HasVLX] in { 8887 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 8888 sched.XMM>, 8889 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 8890 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 8891 sched.YMM>, 8892 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 8893 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 8894 sched.XMM>, 8895 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8896 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 8897 sched.YMM>, 8898 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8899 } 8900} 8901 8902let Predicates = [HasERI] in { 8903 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 8904 SchedWriteFRsqrt>, EVEX; 8905 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 8906 SchedWriteFRcp>, EVEX; 8907 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 8908 SchedWriteFAdd>, EVEX; 8909} 8910defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 8911 SchedWriteFRnd>, 8912 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 8913 SchedWriteFRnd>, EVEX; 8914 8915multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 8916 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8917 let ExeDomain = _.ExeDomain in 8918 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8919 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 8920 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 8921 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8922} 8923 8924multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 8925 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8926 let ExeDomain = _.ExeDomain in { 8927 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8928 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8929 (_.VT (fsqrt _.RC:$src))>, EVEX, 8930 Sched<[sched]>; 8931 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8932 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8933 (fsqrt (_.VT 8934 (bitconvert (_.LdFrag addr:$src))))>, EVEX, 8935 Sched<[sched.Folded, sched.ReadAfterFold]>; 8936 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8937 (ins _.ScalarMemOp:$src), OpcodeStr, 8938 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8939 (fsqrt (_.VT 8940 (_.BroadcastLdFrag addr:$src)))>, 8941 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8942 } 8943} 8944 8945multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 8946 X86SchedWriteSizes sched> { 8947 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8948 sched.PS.ZMM, v16f32_info>, 8949 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8950 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8951 sched.PD.ZMM, v8f64_info>, 8952 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8953 // Define only if AVX512VL feature is present. 8954 let Predicates = [HasVLX] in { 8955 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8956 sched.PS.XMM, v4f32x_info>, 8957 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 8958 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8959 sched.PS.YMM, v8f32x_info>, 8960 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 8961 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8962 sched.PD.XMM, v2f64x_info>, 8963 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8964 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8965 sched.PD.YMM, v4f64x_info>, 8966 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8967 } 8968} 8969 8970multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 8971 X86SchedWriteSizes sched> { 8972 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 8973 sched.PS.ZMM, v16f32_info>, 8974 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8975 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 8976 sched.PD.ZMM, v8f64_info>, 8977 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8978} 8979 8980multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 8981 X86VectorVTInfo _, string Name> { 8982 let ExeDomain = _.ExeDomain in { 8983 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8984 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8985 "$src2, $src1", "$src1, $src2", 8986 (X86fsqrts (_.VT _.RC:$src1), 8987 (_.VT _.RC:$src2))>, 8988 Sched<[sched]>; 8989 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8990 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8991 "$src2, $src1", "$src1, $src2", 8992 (X86fsqrts (_.VT _.RC:$src1), 8993 _.ScalarIntMemCPat:$src2)>, 8994 Sched<[sched.Folded, sched.ReadAfterFold]>; 8995 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8996 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 8997 "$rc, $src2, $src1", "$src1, $src2, $rc", 8998 (X86fsqrtRnds (_.VT _.RC:$src1), 8999 (_.VT _.RC:$src2), 9000 (i32 timm:$rc))>, 9001 EVEX_B, EVEX_RC, Sched<[sched]>; 9002 9003 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { 9004 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9005 (ins _.FRC:$src1, _.FRC:$src2), 9006 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9007 Sched<[sched]>; 9008 let mayLoad = 1 in 9009 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9010 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9011 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9012 Sched<[sched.Folded, sched.ReadAfterFold]>; 9013 } 9014 } 9015 9016 let Predicates = [HasAVX512] in { 9017 def : Pat<(_.EltVT (fsqrt _.FRC:$src)), 9018 (!cast<Instruction>(Name#Zr) 9019 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9020 } 9021 9022 let Predicates = [HasAVX512, OptForSize] in { 9023 def : Pat<(_.EltVT (fsqrt (load addr:$src))), 9024 (!cast<Instruction>(Name#Zm) 9025 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9026 } 9027} 9028 9029multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9030 X86SchedWriteSizes sched> { 9031 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9032 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9033 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9034 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9035} 9036 9037defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9038 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9039 9040defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9041 9042multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9043 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9044 let ExeDomain = _.ExeDomain in { 9045 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9046 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9047 "$src3, $src2, $src1", "$src1, $src2, $src3", 9048 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9049 (i32 timm:$src3)))>, 9050 Sched<[sched]>; 9051 9052 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9053 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9054 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9055 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9056 (i32 timm:$src3)))>, EVEX_B, 9057 Sched<[sched]>; 9058 9059 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9060 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9061 OpcodeStr, 9062 "$src3, $src2, $src1", "$src1, $src2, $src3", 9063 (_.VT (X86RndScales _.RC:$src1, 9064 _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, 9065 Sched<[sched.Folded, sched.ReadAfterFold]>; 9066 9067 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9068 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9069 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9070 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9071 []>, Sched<[sched]>; 9072 9073 let mayLoad = 1 in 9074 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9075 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9076 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9077 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 9078 } 9079 } 9080 9081 let Predicates = [HasAVX512] in { 9082 def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2), 9083 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 9084 _.FRC:$src1, timm:$src2))>; 9085 } 9086 9087 let Predicates = [HasAVX512, OptForSize] in { 9088 def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9089 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 9090 addr:$src1, timm:$src2))>; 9091 } 9092} 9093 9094defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9095 SchedWriteFRnd.Scl, f32x_info>, 9096 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9097 EVEX_CD8<32, CD8VT1>; 9098 9099defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9100 SchedWriteFRnd.Scl, f64x_info>, 9101 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9102 EVEX_CD8<64, CD8VT1>; 9103 9104multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9105 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9106 dag OutMask, Predicate BasePredicate> { 9107 let Predicates = [BasePredicate] in { 9108 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 9109 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9110 (extractelt _.VT:$dst, (iPTR 0))))), 9111 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9112 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9113 9114 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 9115 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9116 ZeroFP))), 9117 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9118 OutMask, _.VT:$src2, _.VT:$src1)>; 9119 } 9120} 9121 9122defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9123 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9124 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9125defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9126 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9127 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9128 9129 9130//------------------------------------------------- 9131// Integer truncate and extend operations 9132//------------------------------------------------- 9133 9134// PatFrags that contain a select and a truncate op. The take operands in the 9135// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9136// either to the multiclasses. 9137def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9138 (vselect node:$mask, 9139 (trunc node:$src), node:$src0)>; 9140def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9141 (vselect node:$mask, 9142 (X86vtruncs node:$src), node:$src0)>; 9143def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9144 (vselect node:$mask, 9145 (X86vtruncus node:$src), node:$src0)>; 9146 9147multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9148 SDPatternOperator MaskNode, 9149 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9150 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9151 let ExeDomain = DestInfo.ExeDomain in { 9152 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9153 (ins SrcInfo.RC:$src), 9154 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9155 [(set DestInfo.RC:$dst, 9156 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9157 EVEX, Sched<[sched]>; 9158 let Constraints = "$src0 = $dst" in 9159 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9160 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9161 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9162 [(set DestInfo.RC:$dst, 9163 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9164 (DestInfo.VT DestInfo.RC:$src0), 9165 SrcInfo.KRCWM:$mask))]>, 9166 EVEX, EVEX_K, Sched<[sched]>; 9167 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9168 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9169 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9170 [(set DestInfo.RC:$dst, 9171 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9172 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9173 EVEX, EVEX_KZ, Sched<[sched]>; 9174 } 9175 9176 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9177 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9178 (ins x86memop:$dst, SrcInfo.RC:$src), 9179 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9180 EVEX, Sched<[sched.Folded]>; 9181 9182 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9183 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9184 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9185 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9186 }//mayStore = 1, hasSideEffects = 0 9187} 9188 9189multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9190 X86VectorVTInfo DestInfo, 9191 PatFrag truncFrag, PatFrag mtruncFrag, 9192 string Name> { 9193 9194 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9195 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr) 9196 addr:$dst, SrcInfo.RC:$src)>; 9197 9198 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9199 SrcInfo.KRCWM:$mask), 9200 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk) 9201 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9202} 9203 9204multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9205 SDNode OpNode256, SDNode OpNode512, 9206 SDPatternOperator MaskNode128, 9207 SDPatternOperator MaskNode256, 9208 SDPatternOperator MaskNode512, 9209 X86FoldableSchedWrite sched, 9210 AVX512VLVectorVTInfo VTSrcInfo, 9211 X86VectorVTInfo DestInfoZ128, 9212 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9213 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9214 X86MemOperand x86memopZ, PatFrag truncFrag, 9215 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9216 9217 let Predicates = [HasVLX, prd] in { 9218 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9219 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9220 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, 9221 truncFrag, mtruncFrag, NAME>, EVEX_V128; 9222 9223 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9224 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9225 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, 9226 truncFrag, mtruncFrag, NAME>, EVEX_V256; 9227 } 9228 let Predicates = [prd] in 9229 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9230 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9231 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, 9232 truncFrag, mtruncFrag, NAME>, EVEX_V512; 9233} 9234 9235multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9236 SDPatternOperator MaskNode, 9237 X86FoldableSchedWrite sched, PatFrag StoreNode, 9238 PatFrag MaskedStoreNode, SDNode InVecNode, 9239 SDPatternOperator InVecMaskNode> { 9240 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9241 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9242 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9243 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9244 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9245} 9246 9247multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9248 SDPatternOperator MaskNode, 9249 X86FoldableSchedWrite sched, PatFrag StoreNode, 9250 PatFrag MaskedStoreNode, SDNode InVecNode, 9251 SDPatternOperator InVecMaskNode> { 9252 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9253 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9254 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9255 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9256 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9257} 9258 9259multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9260 SDPatternOperator MaskNode, 9261 X86FoldableSchedWrite sched, PatFrag StoreNode, 9262 PatFrag MaskedStoreNode, SDNode InVecNode, 9263 SDPatternOperator InVecMaskNode> { 9264 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9265 InVecMaskNode, MaskNode, MaskNode, sched, 9266 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9267 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9268 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9269} 9270 9271multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9272 SDPatternOperator MaskNode, 9273 X86FoldableSchedWrite sched, PatFrag StoreNode, 9274 PatFrag MaskedStoreNode, SDNode InVecNode, 9275 SDPatternOperator InVecMaskNode> { 9276 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9277 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9278 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9279 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9280 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9281} 9282 9283multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9284 SDPatternOperator MaskNode, 9285 X86FoldableSchedWrite sched, PatFrag StoreNode, 9286 PatFrag MaskedStoreNode, SDNode InVecNode, 9287 SDPatternOperator InVecMaskNode> { 9288 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9289 InVecMaskNode, MaskNode, MaskNode, sched, 9290 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9291 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9292 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9293} 9294 9295multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9296 SDPatternOperator MaskNode, 9297 X86FoldableSchedWrite sched, PatFrag StoreNode, 9298 PatFrag MaskedStoreNode, SDNode InVecNode, 9299 SDPatternOperator InVecMaskNode> { 9300 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9301 InVecMaskNode, MaskNode, MaskNode, sched, 9302 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9303 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9304 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9305} 9306 9307defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc, 9308 WriteShuffle256, truncstorevi8, 9309 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9310defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs, 9311 WriteShuffle256, truncstore_s_vi8, 9312 masked_truncstore_s_vi8, X86vtruncs, 9313 X86vmtruncs>; 9314defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, 9315 select_truncus, WriteShuffle256, 9316 truncstore_us_vi8, masked_truncstore_us_vi8, 9317 X86vtruncus, X86vmtruncus>; 9318 9319defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9320 WriteShuffle256, truncstorevi16, 9321 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9322defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9323 WriteShuffle256, truncstore_s_vi16, 9324 masked_truncstore_s_vi16, X86vtruncs, 9325 X86vmtruncs>; 9326defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9327 select_truncus, WriteShuffle256, 9328 truncstore_us_vi16, masked_truncstore_us_vi16, 9329 X86vtruncus, X86vmtruncus>; 9330 9331defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9332 WriteShuffle256, truncstorevi32, 9333 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9334defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9335 WriteShuffle256, truncstore_s_vi32, 9336 masked_truncstore_s_vi32, X86vtruncs, 9337 X86vmtruncs>; 9338defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9339 select_truncus, WriteShuffle256, 9340 truncstore_us_vi32, masked_truncstore_us_vi32, 9341 X86vtruncus, X86vmtruncus>; 9342 9343defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9344 WriteShuffle256, truncstorevi8, 9345 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9346defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9347 WriteShuffle256, truncstore_s_vi8, 9348 masked_truncstore_s_vi8, X86vtruncs, 9349 X86vmtruncs>; 9350defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9351 select_truncus, WriteShuffle256, 9352 truncstore_us_vi8, masked_truncstore_us_vi8, 9353 X86vtruncus, X86vmtruncus>; 9354 9355defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9356 WriteShuffle256, truncstorevi16, 9357 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9358defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9359 WriteShuffle256, truncstore_s_vi16, 9360 masked_truncstore_s_vi16, X86vtruncs, 9361 X86vmtruncs>; 9362defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9363 select_truncus, WriteShuffle256, 9364 truncstore_us_vi16, masked_truncstore_us_vi16, 9365 X86vtruncus, X86vmtruncus>; 9366 9367defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9368 WriteShuffle256, truncstorevi8, 9369 masked_truncstorevi8, X86vtrunc, 9370 X86vmtrunc>; 9371defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9372 WriteShuffle256, truncstore_s_vi8, 9373 masked_truncstore_s_vi8, X86vtruncs, 9374 X86vmtruncs>; 9375defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9376 select_truncus, WriteShuffle256, 9377 truncstore_us_vi8, masked_truncstore_us_vi8, 9378 X86vtruncus, X86vmtruncus>; 9379 9380let Predicates = [HasAVX512, NoVLX] in { 9381def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9382 (v8i16 (EXTRACT_SUBREG 9383 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9384 VR256X:$src, sub_ymm)))), sub_xmm))>; 9385def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9386 (v4i32 (EXTRACT_SUBREG 9387 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9388 VR256X:$src, sub_ymm)))), sub_xmm))>; 9389} 9390 9391let Predicates = [HasBWI, NoVLX] in { 9392def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9393 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9394 VR256X:$src, sub_ymm))), sub_xmm))>; 9395} 9396 9397// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9398multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9399 X86VectorVTInfo DestInfo, 9400 X86VectorVTInfo SrcInfo> { 9401 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9402 DestInfo.RC:$src0, 9403 SrcInfo.KRCWM:$mask)), 9404 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9405 SrcInfo.KRCWM:$mask, 9406 SrcInfo.RC:$src)>; 9407 9408 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9409 DestInfo.ImmAllZerosV, 9410 SrcInfo.KRCWM:$mask)), 9411 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9412 SrcInfo.RC:$src)>; 9413} 9414 9415let Predicates = [HasVLX] in { 9416defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9417defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9418defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9419} 9420 9421let Predicates = [HasAVX512] in { 9422defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9423defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9424defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9425 9426defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9427defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9428defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9429 9430defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9431defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9432defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9433} 9434 9435multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9436 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9437 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9438 let ExeDomain = DestInfo.ExeDomain in { 9439 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9440 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9441 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9442 EVEX, Sched<[sched]>; 9443 9444 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9445 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9446 (DestInfo.VT (LdFrag addr:$src))>, 9447 EVEX, Sched<[sched.Folded]>; 9448 } 9449} 9450 9451multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 9452 SDNode OpNode, SDNode InVecNode, string ExtTy, 9453 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9454 let Predicates = [HasVLX, HasBWI] in { 9455 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 9456 v16i8x_info, i64mem, LdFrag, InVecNode>, 9457 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9458 9459 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 9460 v16i8x_info, i128mem, LdFrag, OpNode>, 9461 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9462 } 9463 let Predicates = [HasBWI] in { 9464 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 9465 v32i8x_info, i256mem, LdFrag, OpNode>, 9466 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9467 } 9468} 9469 9470multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 9471 SDNode OpNode, SDNode InVecNode, string ExtTy, 9472 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9473 let Predicates = [HasVLX, HasAVX512] in { 9474 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9475 v16i8x_info, i32mem, LdFrag, InVecNode>, 9476 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9477 9478 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9479 v16i8x_info, i64mem, LdFrag, InVecNode>, 9480 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9481 } 9482 let Predicates = [HasAVX512] in { 9483 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9484 v16i8x_info, i128mem, LdFrag, OpNode>, 9485 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9486 } 9487} 9488 9489multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 9490 SDNode OpNode, SDNode InVecNode, string ExtTy, 9491 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9492 let Predicates = [HasVLX, HasAVX512] in { 9493 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9494 v16i8x_info, i16mem, LdFrag, InVecNode>, 9495 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 9496 9497 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9498 v16i8x_info, i32mem, LdFrag, InVecNode>, 9499 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 9500 } 9501 let Predicates = [HasAVX512] in { 9502 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9503 v16i8x_info, i64mem, LdFrag, InVecNode>, 9504 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 9505 } 9506} 9507 9508multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 9509 SDNode OpNode, SDNode InVecNode, string ExtTy, 9510 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9511 let Predicates = [HasVLX, HasAVX512] in { 9512 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9513 v8i16x_info, i64mem, LdFrag, InVecNode>, 9514 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9515 9516 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9517 v8i16x_info, i128mem, LdFrag, OpNode>, 9518 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9519 } 9520 let Predicates = [HasAVX512] in { 9521 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9522 v16i16x_info, i256mem, LdFrag, OpNode>, 9523 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9524 } 9525} 9526 9527multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 9528 SDNode OpNode, SDNode InVecNode, string ExtTy, 9529 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9530 let Predicates = [HasVLX, HasAVX512] in { 9531 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9532 v8i16x_info, i32mem, LdFrag, InVecNode>, 9533 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9534 9535 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9536 v8i16x_info, i64mem, LdFrag, InVecNode>, 9537 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9538 } 9539 let Predicates = [HasAVX512] in { 9540 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9541 v8i16x_info, i128mem, LdFrag, OpNode>, 9542 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9543 } 9544} 9545 9546multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 9547 SDNode OpNode, SDNode InVecNode, string ExtTy, 9548 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9549 9550 let Predicates = [HasVLX, HasAVX512] in { 9551 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9552 v4i32x_info, i64mem, LdFrag, InVecNode>, 9553 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 9554 9555 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9556 v4i32x_info, i128mem, LdFrag, OpNode>, 9557 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 9558 } 9559 let Predicates = [HasAVX512] in { 9560 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9561 v8i32x_info, i256mem, LdFrag, OpNode>, 9562 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 9563 } 9564} 9565 9566defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>; 9567defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>; 9568defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>; 9569defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>; 9570defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>; 9571defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>; 9572 9573defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>; 9574defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>; 9575defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>; 9576defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>; 9577defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>; 9578defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>; 9579 9580 9581// Patterns that we also need any extend versions of. aext_vector_inreg 9582// is currently legalized to zext_vector_inreg. 9583multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 9584 // 256-bit patterns 9585 let Predicates = [HasVLX, HasBWI] in { 9586 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 9587 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9588 } 9589 9590 let Predicates = [HasVLX] in { 9591 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 9592 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9593 9594 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 9595 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9596 } 9597 9598 // 512-bit patterns 9599 let Predicates = [HasBWI] in { 9600 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 9601 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 9602 } 9603 let Predicates = [HasAVX512] in { 9604 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 9605 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 9606 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 9607 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 9608 9609 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 9610 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 9611 9612 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 9613 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 9614 } 9615} 9616 9617multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 9618 SDNode InVecOp> : 9619 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 9620 // 128-bit patterns 9621 let Predicates = [HasVLX, HasBWI] in { 9622 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9623 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9624 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9625 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9626 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9627 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9628 } 9629 let Predicates = [HasVLX] in { 9630 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9631 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9632 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9633 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9634 9635 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 9636 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9637 9638 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9639 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9640 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9641 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9642 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9643 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9644 9645 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9646 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9647 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 9648 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9649 9650 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9651 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9652 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9653 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9654 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9655 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9656 } 9657 let Predicates = [HasVLX] in { 9658 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9659 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9660 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9661 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9662 9663 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9664 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9665 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9666 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9667 9668 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9669 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9670 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9671 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9672 } 9673 // 512-bit patterns 9674 let Predicates = [HasAVX512] in { 9675 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9676 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9677 } 9678} 9679 9680defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 9681defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 9682 9683// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 9684// ext+trunc aggresively making it impossible to legalize the DAG to this 9685// pattern directly. 9686let Predicates = [HasAVX512, NoBWI] in { 9687def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9688 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 9689def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 9690 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 9691} 9692 9693//===----------------------------------------------------------------------===// 9694// GATHER - SCATTER Operations 9695 9696// FIXME: Improve scheduling of gather/scatter instructions. 9697multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9698 X86MemOperand memop, PatFrag GatherNode, 9699 RegisterClass MaskRC = _.KRCWM> { 9700 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 9701 ExeDomain = _.ExeDomain in 9702 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 9703 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 9704 !strconcat(OpcodeStr#_.Suffix, 9705 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 9706 [(set _.RC:$dst, MaskRC:$mask_wb, 9707 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask, 9708 vectoraddr:$src2))]>, EVEX, EVEX_K, 9709 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; 9710} 9711 9712multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 9713 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9714 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, 9715 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W; 9716 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512, 9717 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W; 9718let Predicates = [HasVLX] in { 9719 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9720 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W; 9721 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256, 9722 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W; 9723 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9724 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W; 9725 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9726 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W; 9727} 9728} 9729 9730multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 9731 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9732 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem, 9733 mgatherv16i32>, EVEX_V512; 9734 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem, 9735 mgatherv8i64>, EVEX_V512; 9736let Predicates = [HasVLX] in { 9737 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9738 vy256xmem, mgatherv8i32>, EVEX_V256; 9739 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9740 vy128xmem, mgatherv4i64>, EVEX_V256; 9741 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9742 vx128xmem, mgatherv4i32>, EVEX_V128; 9743 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9744 vx64xmem, mgatherv2i64, VK2WM>, 9745 EVEX_V128; 9746} 9747} 9748 9749 9750defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 9751 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 9752 9753defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 9754 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 9755 9756multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9757 X86MemOperand memop, PatFrag ScatterNode, 9758 RegisterClass MaskRC = _.KRCWM> { 9759 9760let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in 9761 9762 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 9763 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 9764 !strconcat(OpcodeStr#_.Suffix, 9765 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 9766 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src), 9767 MaskRC:$mask, vectoraddr:$dst))]>, 9768 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9769 Sched<[WriteStore]>; 9770} 9771 9772multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 9773 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9774 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, 9775 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W; 9776 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512, 9777 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W; 9778let Predicates = [HasVLX] in { 9779 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9780 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W; 9781 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256, 9782 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W; 9783 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9784 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W; 9785 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9786 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W; 9787} 9788} 9789 9790multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 9791 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9792 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem, 9793 mscatterv16i32>, EVEX_V512; 9794 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem, 9795 mscatterv8i64>, EVEX_V512; 9796let Predicates = [HasVLX] in { 9797 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9798 vy256xmem, mscatterv8i32>, EVEX_V256; 9799 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9800 vy128xmem, mscatterv4i64>, EVEX_V256; 9801 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9802 vx128xmem, mscatterv4i32>, EVEX_V128; 9803 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9804 vx64xmem, mscatterv2i64, VK2WM>, 9805 EVEX_V128; 9806} 9807} 9808 9809defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 9810 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 9811 9812defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 9813 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 9814 9815// prefetch 9816multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 9817 RegisterClass KRC, X86MemOperand memop> { 9818 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 9819 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 9820 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 9821 EVEX, EVEX_K, Sched<[WriteLoad]>; 9822} 9823 9824defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 9825 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9826 9827defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 9828 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9829 9830defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 9831 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9832 9833defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 9834 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9835 9836defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 9837 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9838 9839defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 9840 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9841 9842defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 9843 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9844 9845defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 9846 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9847 9848defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 9849 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9850 9851defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 9852 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9853 9854defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 9855 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9856 9857defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 9858 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9859 9860defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 9861 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9862 9863defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 9864 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9865 9866defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 9867 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9868 9869defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 9870 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9871 9872multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 9873def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 9874 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 9875 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 9876 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 9877 9878// Also need a pattern for anyextend. 9879def : Pat<(Vec.VT (anyext Vec.KRC:$src)), 9880 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>; 9881} 9882 9883multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 9884 string OpcodeStr, Predicate prd> { 9885let Predicates = [prd] in 9886 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 9887 9888 let Predicates = [prd, HasVLX] in { 9889 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 9890 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 9891 } 9892} 9893 9894defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 9895defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 9896defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 9897defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 9898 9899multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 9900 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 9901 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 9902 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 9903 EVEX, Sched<[WriteMove]>; 9904} 9905 9906// Use 512bit version to implement 128/256 bit in case NoVLX. 9907multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 9908 X86VectorVTInfo _, 9909 string Name> { 9910 9911 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 9912 (_.KVT (COPY_TO_REGCLASS 9913 (!cast<Instruction>(Name#"Zrr") 9914 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 9915 _.RC:$src, _.SubRegIdx)), 9916 _.KRC))>; 9917} 9918 9919multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 9920 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 9921 let Predicates = [prd] in 9922 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 9923 EVEX_V512; 9924 9925 let Predicates = [prd, HasVLX] in { 9926 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 9927 EVEX_V256; 9928 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 9929 EVEX_V128; 9930 } 9931 let Predicates = [prd, NoVLX] in { 9932 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 9933 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 9934 } 9935} 9936 9937defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 9938 avx512vl_i8_info, HasBWI>; 9939defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 9940 avx512vl_i16_info, HasBWI>, VEX_W; 9941defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 9942 avx512vl_i32_info, HasDQI>; 9943defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 9944 avx512vl_i64_info, HasDQI>, VEX_W; 9945 9946// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 9947// is available, but BWI is not. We can't handle this in lowering because 9948// a target independent DAG combine likes to combine sext and trunc. 9949let Predicates = [HasDQI, NoBWI] in { 9950 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 9951 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9952 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 9953 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9954 9955 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))), 9956 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9957 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))), 9958 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9959} 9960 9961let Predicates = [HasDQI, NoBWI, HasVLX] in { 9962 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 9963 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9964 9965 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))), 9966 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9967} 9968 9969//===----------------------------------------------------------------------===// 9970// AVX-512 - COMPRESS and EXPAND 9971// 9972 9973multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 9974 string OpcodeStr, X86FoldableSchedWrite sched> { 9975 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 9976 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9977 (null_frag)>, AVX5128IBase, 9978 Sched<[sched]>; 9979 9980 let mayStore = 1, hasSideEffects = 0 in 9981 def mr : AVX5128I<opc, MRMDestMem, (outs), 9982 (ins _.MemOp:$dst, _.RC:$src), 9983 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9984 []>, EVEX_CD8<_.EltSize, CD8VT1>, 9985 Sched<[sched.Folded]>; 9986 9987 def mrk : AVX5128I<opc, MRMDestMem, (outs), 9988 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 9989 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9990 []>, 9991 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9992 Sched<[sched.Folded]>; 9993} 9994 9995multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9996 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 9997 (!cast<Instruction>(Name#_.ZSuffix##mrk) 9998 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 9999 10000 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10001 (!cast<Instruction>(Name#_.ZSuffix##rrk) 10002 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10003 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10004 (!cast<Instruction>(Name#_.ZSuffix##rrkz) 10005 _.KRCWM:$mask, _.RC:$src)>; 10006} 10007 10008multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10009 X86FoldableSchedWrite sched, 10010 AVX512VLVectorVTInfo VTInfo, 10011 Predicate Pred = HasAVX512> { 10012 let Predicates = [Pred] in 10013 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10014 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10015 10016 let Predicates = [Pred, HasVLX] in { 10017 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10018 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10019 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10020 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10021 } 10022} 10023 10024// FIXME: Is there a better scheduler class for VPCOMPRESS? 10025defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10026 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 10027defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10028 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 10029defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10030 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 10031defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10032 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 10033 10034// expand 10035multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10036 string OpcodeStr, X86FoldableSchedWrite sched> { 10037 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10038 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10039 (null_frag)>, AVX5128IBase, 10040 Sched<[sched]>; 10041 10042 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10043 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10044 (null_frag)>, 10045 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10046 Sched<[sched.Folded, sched.ReadAfterFold]>; 10047} 10048 10049multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10050 10051 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10052 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 10053 _.KRCWM:$mask, addr:$src)>; 10054 10055 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10056 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 10057 _.KRCWM:$mask, addr:$src)>; 10058 10059 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10060 (_.VT _.RC:$src0))), 10061 (!cast<Instruction>(Name#_.ZSuffix##rmk) 10062 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10063 10064 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10065 (!cast<Instruction>(Name#_.ZSuffix##rrk) 10066 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10067 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10068 (!cast<Instruction>(Name#_.ZSuffix##rrkz) 10069 _.KRCWM:$mask, _.RC:$src)>; 10070} 10071 10072multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10073 X86FoldableSchedWrite sched, 10074 AVX512VLVectorVTInfo VTInfo, 10075 Predicate Pred = HasAVX512> { 10076 let Predicates = [Pred] in 10077 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10078 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10079 10080 let Predicates = [Pred, HasVLX] in { 10081 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10082 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10083 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10084 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10085 } 10086} 10087 10088// FIXME: Is there a better scheduler class for VPEXPAND? 10089defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10090 avx512vl_i32_info>, EVEX; 10091defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10092 avx512vl_i64_info>, EVEX, VEX_W; 10093defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10094 avx512vl_f32_info>, EVEX; 10095defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10096 avx512vl_f64_info>, EVEX, VEX_W; 10097 10098//handle instruction reg_vec1 = op(reg_vec,imm) 10099// op(mem_vec,imm) 10100// op(broadcast(eltVt),imm) 10101//all instruction created with FROUND_CURRENT 10102multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10103 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10104 let ExeDomain = _.ExeDomain in { 10105 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10106 (ins _.RC:$src1, i32u8imm:$src2), 10107 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 10108 (OpNode (_.VT _.RC:$src1), 10109 (i32 timm:$src2))>, Sched<[sched]>; 10110 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10111 (ins _.MemOp:$src1, i32u8imm:$src2), 10112 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 10113 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10114 (i32 timm:$src2))>, 10115 Sched<[sched.Folded, sched.ReadAfterFold]>; 10116 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10117 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10118 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, 10119 "${src1}"##_.BroadcastStr##", $src2", 10120 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10121 (i32 timm:$src2))>, EVEX_B, 10122 Sched<[sched.Folded, sched.ReadAfterFold]>; 10123 } 10124} 10125 10126//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10127multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10128 SDNode OpNode, X86FoldableSchedWrite sched, 10129 X86VectorVTInfo _> { 10130 let ExeDomain = _.ExeDomain in 10131 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10132 (ins _.RC:$src1, i32u8imm:$src2), 10133 OpcodeStr##_.Suffix, "$src2, {sae}, $src1", 10134 "$src1, {sae}, $src2", 10135 (OpNode (_.VT _.RC:$src1), 10136 (i32 timm:$src2))>, 10137 EVEX_B, Sched<[sched]>; 10138} 10139 10140multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10141 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10142 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10143 let Predicates = [prd] in { 10144 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, 10145 _.info512>, 10146 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10147 sched.ZMM, _.info512>, EVEX_V512; 10148 } 10149 let Predicates = [prd, HasVLX] in { 10150 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, 10151 _.info128>, EVEX_V128; 10152 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, 10153 _.info256>, EVEX_V256; 10154 } 10155} 10156 10157//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10158// op(reg_vec2,mem_vec,imm) 10159// op(reg_vec2,broadcast(eltVt),imm) 10160//all instruction created with FROUND_CURRENT 10161multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10162 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10163 let ExeDomain = _.ExeDomain in { 10164 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10165 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10166 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10167 (OpNode (_.VT _.RC:$src1), 10168 (_.VT _.RC:$src2), 10169 (i32 timm:$src3))>, 10170 Sched<[sched]>; 10171 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10172 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10173 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10174 (OpNode (_.VT _.RC:$src1), 10175 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10176 (i32 timm:$src3))>, 10177 Sched<[sched.Folded, sched.ReadAfterFold]>; 10178 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10179 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10180 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10181 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10182 (OpNode (_.VT _.RC:$src1), 10183 (_.VT (_.BroadcastLdFrag addr:$src2)), 10184 (i32 timm:$src3))>, EVEX_B, 10185 Sched<[sched.Folded, sched.ReadAfterFold]>; 10186 } 10187} 10188 10189//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10190// op(reg_vec2,mem_vec,imm) 10191multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10192 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10193 X86VectorVTInfo SrcInfo>{ 10194 let ExeDomain = DestInfo.ExeDomain in { 10195 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10196 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10197 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10198 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10199 (SrcInfo.VT SrcInfo.RC:$src2), 10200 (i8 timm:$src3)))>, 10201 Sched<[sched]>; 10202 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10203 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10204 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10205 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10206 (SrcInfo.VT (bitconvert 10207 (SrcInfo.LdFrag addr:$src2))), 10208 (i8 timm:$src3)))>, 10209 Sched<[sched.Folded, sched.ReadAfterFold]>; 10210 } 10211} 10212 10213//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10214// op(reg_vec2,mem_vec,imm) 10215// op(reg_vec2,broadcast(eltVt),imm) 10216multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10217 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10218 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10219 10220 let ExeDomain = _.ExeDomain in 10221 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10222 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10223 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10224 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10225 (OpNode (_.VT _.RC:$src1), 10226 (_.VT (_.BroadcastLdFrag addr:$src2)), 10227 (i8 timm:$src3))>, EVEX_B, 10228 Sched<[sched.Folded, sched.ReadAfterFold]>; 10229} 10230 10231//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10232// op(reg_vec2,mem_scalar,imm) 10233multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10234 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10235 let ExeDomain = _.ExeDomain in { 10236 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10237 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10238 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10239 (OpNode (_.VT _.RC:$src1), 10240 (_.VT _.RC:$src2), 10241 (i32 timm:$src3))>, 10242 Sched<[sched]>; 10243 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10244 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10245 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10246 (OpNode (_.VT _.RC:$src1), 10247 (_.VT _.ScalarIntMemCPat:$src2), 10248 (i32 timm:$src3))>, 10249 Sched<[sched.Folded, sched.ReadAfterFold]>; 10250 } 10251} 10252 10253//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10254multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10255 SDNode OpNode, X86FoldableSchedWrite sched, 10256 X86VectorVTInfo _> { 10257 let ExeDomain = _.ExeDomain in 10258 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10259 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10260 OpcodeStr, "$src3, {sae}, $src2, $src1", 10261 "$src1, $src2, {sae}, $src3", 10262 (OpNode (_.VT _.RC:$src1), 10263 (_.VT _.RC:$src2), 10264 (i32 timm:$src3))>, 10265 EVEX_B, Sched<[sched]>; 10266} 10267 10268//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10269multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10270 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10271 let ExeDomain = _.ExeDomain in 10272 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10273 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10274 OpcodeStr, "$src3, {sae}, $src2, $src1", 10275 "$src1, $src2, {sae}, $src3", 10276 (OpNode (_.VT _.RC:$src1), 10277 (_.VT _.RC:$src2), 10278 (i32 timm:$src3))>, 10279 EVEX_B, Sched<[sched]>; 10280} 10281 10282multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10283 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10284 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10285 let Predicates = [prd] in { 10286 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10287 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10288 EVEX_V512; 10289 10290 } 10291 let Predicates = [prd, HasVLX] in { 10292 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10293 EVEX_V128; 10294 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10295 EVEX_V256; 10296 } 10297} 10298 10299multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10300 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10301 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10302 let Predicates = [Pred] in { 10303 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10304 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10305 } 10306 let Predicates = [Pred, HasVLX] in { 10307 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10308 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10309 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10310 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10311 } 10312} 10313 10314multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10315 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10316 Predicate Pred = HasAVX512> { 10317 let Predicates = [Pred] in { 10318 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10319 EVEX_V512; 10320 } 10321 let Predicates = [Pred, HasVLX] in { 10322 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10323 EVEX_V128; 10324 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10325 EVEX_V256; 10326 } 10327} 10328 10329multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10330 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10331 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10332 let Predicates = [prd] in { 10333 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10334 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10335 } 10336} 10337 10338multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10339 bits<8> opcPs, bits<8> opcPd, SDNode OpNode, 10340 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10341 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10342 opcPs, OpNode, OpNodeSAE, sched, prd>, 10343 EVEX_CD8<32, CD8VF>; 10344 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10345 opcPd, OpNode, OpNodeSAE, sched, prd>, 10346 EVEX_CD8<64, CD8VF>, VEX_W; 10347} 10348 10349defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10350 X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>, 10351 AVX512AIi8Base, EVEX; 10352defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10353 X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>, 10354 AVX512AIi8Base, EVEX; 10355defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10356 X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>, 10357 AVX512AIi8Base, EVEX; 10358 10359defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10360 0x50, X86VRange, X86VRangeSAE, 10361 SchedWriteFAdd, HasDQI>, 10362 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10363defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10364 0x50, X86VRange, X86VRangeSAE, 10365 SchedWriteFAdd, HasDQI>, 10366 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10367 10368defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10369 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10370 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10371defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10372 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10373 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10374 10375defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10376 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10377 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10378defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10379 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10380 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10381 10382defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10383 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10384 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10385defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10386 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10387 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10388 10389multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10390 X86FoldableSchedWrite sched, 10391 X86VectorVTInfo _, 10392 X86VectorVTInfo CastInfo, 10393 string EVEX2VEXOvrd> { 10394 let ExeDomain = _.ExeDomain in { 10395 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10396 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10397 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10398 (_.VT (bitconvert 10399 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10400 (i8 timm:$src3)))))>, 10401 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 10402 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10403 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10404 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10405 (_.VT 10406 (bitconvert 10407 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10408 (CastInfo.LdFrag addr:$src2), 10409 (i8 timm:$src3)))))>, 10410 Sched<[sched.Folded, sched.ReadAfterFold]>, 10411 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 10412 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10413 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10414 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10415 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10416 (_.VT 10417 (bitconvert 10418 (CastInfo.VT 10419 (X86Shuf128 _.RC:$src1, 10420 (_.BroadcastLdFrag addr:$src2), 10421 (i8 timm:$src3)))))>, EVEX_B, 10422 Sched<[sched.Folded, sched.ReadAfterFold]>; 10423 } 10424} 10425 10426multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10427 AVX512VLVectorVTInfo _, 10428 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 10429 string EVEX2VEXOvrd>{ 10430 let Predicates = [HasAVX512] in 10431 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10432 _.info512, CastInfo.info512, "">, EVEX_V512; 10433 10434 let Predicates = [HasAVX512, HasVLX] in 10435 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10436 _.info256, CastInfo.info256, 10437 EVEX2VEXOvrd>, EVEX_V256; 10438} 10439 10440defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10441 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10442defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10443 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10444defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10445 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10446defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10447 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10448 10449let Predicates = [HasAVX512] in { 10450// Provide fallback in case the load node that is used in the broadcast 10451// patterns above is used by additional users, which prevents the pattern 10452// selection. 10453def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 10454 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10455 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10456 0)>; 10457def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 10458 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10459 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10460 0)>; 10461 10462def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 10463 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10464 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10465 0)>; 10466def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 10467 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10468 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10469 0)>; 10470 10471def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 10472 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10473 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10474 0)>; 10475 10476def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 10477 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10478 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10479 0)>; 10480} 10481 10482multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10483 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10484 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 10485 // instantiation of this class. 10486 let ExeDomain = _.ExeDomain in { 10487 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10488 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10489 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10490 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 10491 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 10492 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10493 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10494 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10495 (_.VT (X86VAlign _.RC:$src1, 10496 (bitconvert (_.LdFrag addr:$src2)), 10497 (i8 timm:$src3)))>, 10498 Sched<[sched.Folded, sched.ReadAfterFold]>, 10499 EVEX2VEXOverride<"VPALIGNRrmi">; 10500 10501 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10502 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10503 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10504 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10505 (X86VAlign _.RC:$src1, 10506 (_.VT (_.BroadcastLdFrag addr:$src2)), 10507 (i8 timm:$src3))>, EVEX_B, 10508 Sched<[sched.Folded, sched.ReadAfterFold]>; 10509 } 10510} 10511 10512multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10513 AVX512VLVectorVTInfo _> { 10514 let Predicates = [HasAVX512] in { 10515 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10516 AVX512AIi8Base, EVEX_4V, EVEX_V512; 10517 } 10518 let Predicates = [HasAVX512, HasVLX] in { 10519 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10520 AVX512AIi8Base, EVEX_4V, EVEX_V128; 10521 // We can't really override the 256-bit version so change it back to unset. 10522 let EVEX2VEXOverride = ? in 10523 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10524 AVX512AIi8Base, EVEX_4V, EVEX_V256; 10525 } 10526} 10527 10528defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10529 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10530defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10531 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10532 VEX_W; 10533 10534defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10535 SchedWriteShuffle, avx512vl_i8_info, 10536 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10537 10538// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10539// into vpalignr. 10540def ValignqImm32XForm : SDNodeXForm<timm, [{ 10541 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10542}]>; 10543def ValignqImm8XForm : SDNodeXForm<timm, [{ 10544 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10545}]>; 10546def ValigndImm8XForm : SDNodeXForm<timm, [{ 10547 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10548}]>; 10549 10550multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10551 X86VectorVTInfo From, X86VectorVTInfo To, 10552 SDNodeXForm ImmXForm> { 10553 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10554 (bitconvert 10555 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10556 timm:$src3))), 10557 To.RC:$src0)), 10558 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10559 To.RC:$src1, To.RC:$src2, 10560 (ImmXForm timm:$src3))>; 10561 10562 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10563 (bitconvert 10564 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10565 timm:$src3))), 10566 To.ImmAllZerosV)), 10567 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10568 To.RC:$src1, To.RC:$src2, 10569 (ImmXForm timm:$src3))>; 10570 10571 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10572 (bitconvert 10573 (From.VT (OpNode From.RC:$src1, 10574 (From.LdFrag addr:$src2), 10575 timm:$src3))), 10576 To.RC:$src0)), 10577 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10578 To.RC:$src1, addr:$src2, 10579 (ImmXForm timm:$src3))>; 10580 10581 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10582 (bitconvert 10583 (From.VT (OpNode From.RC:$src1, 10584 (From.LdFrag addr:$src2), 10585 timm:$src3))), 10586 To.ImmAllZerosV)), 10587 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 10588 To.RC:$src1, addr:$src2, 10589 (ImmXForm timm:$src3))>; 10590} 10591 10592multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 10593 X86VectorVTInfo From, 10594 X86VectorVTInfo To, 10595 SDNodeXForm ImmXForm> : 10596 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 10597 def : Pat<(From.VT (OpNode From.RC:$src1, 10598 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 10599 timm:$src3)), 10600 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 10601 (ImmXForm timm:$src3))>; 10602 10603 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10604 (bitconvert 10605 (From.VT (OpNode From.RC:$src1, 10606 (bitconvert 10607 (To.VT (To.BroadcastLdFrag addr:$src2))), 10608 timm:$src3))), 10609 To.RC:$src0)), 10610 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 10611 To.RC:$src1, addr:$src2, 10612 (ImmXForm timm:$src3))>; 10613 10614 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10615 (bitconvert 10616 (From.VT (OpNode From.RC:$src1, 10617 (bitconvert 10618 (To.VT (To.BroadcastLdFrag addr:$src2))), 10619 timm:$src3))), 10620 To.ImmAllZerosV)), 10621 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 10622 To.RC:$src1, addr:$src2, 10623 (ImmXForm timm:$src3))>; 10624} 10625 10626let Predicates = [HasAVX512] in { 10627 // For 512-bit we lower to the widest element type we can. So we only need 10628 // to handle converting valignq to valignd. 10629 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 10630 v16i32_info, ValignqImm32XForm>; 10631} 10632 10633let Predicates = [HasVLX] in { 10634 // For 128-bit we lower to the widest element type we can. So we only need 10635 // to handle converting valignq to valignd. 10636 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 10637 v4i32x_info, ValignqImm32XForm>; 10638 // For 256-bit we lower to the widest element type we can. So we only need 10639 // to handle converting valignq to valignd. 10640 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 10641 v8i32x_info, ValignqImm32XForm>; 10642} 10643 10644let Predicates = [HasVLX, HasBWI] in { 10645 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 10646 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 10647 v16i8x_info, ValignqImm8XForm>; 10648 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 10649 v16i8x_info, ValigndImm8XForm>; 10650} 10651 10652defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 10653 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 10654 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 10655 10656multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10657 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10658 let ExeDomain = _.ExeDomain in { 10659 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10660 (ins _.RC:$src1), OpcodeStr, 10661 "$src1", "$src1", 10662 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 10663 Sched<[sched]>; 10664 10665 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10666 (ins _.MemOp:$src1), OpcodeStr, 10667 "$src1", "$src1", 10668 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 10669 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 10670 Sched<[sched.Folded]>; 10671 } 10672} 10673 10674multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10675 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 10676 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 10677 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10678 (ins _.ScalarMemOp:$src1), OpcodeStr, 10679 "${src1}"##_.BroadcastStr, 10680 "${src1}"##_.BroadcastStr, 10681 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 10682 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 10683 Sched<[sched.Folded]>; 10684} 10685 10686multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10687 X86SchedWriteWidths sched, 10688 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10689 let Predicates = [prd] in 10690 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10691 EVEX_V512; 10692 10693 let Predicates = [prd, HasVLX] in { 10694 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10695 EVEX_V256; 10696 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10697 EVEX_V128; 10698 } 10699} 10700 10701multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10702 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 10703 Predicate prd> { 10704 let Predicates = [prd] in 10705 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10706 EVEX_V512; 10707 10708 let Predicates = [prd, HasVLX] in { 10709 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10710 EVEX_V256; 10711 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10712 EVEX_V128; 10713 } 10714} 10715 10716multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 10717 SDNode OpNode, X86SchedWriteWidths sched, 10718 Predicate prd> { 10719 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 10720 avx512vl_i64_info, prd>, VEX_W; 10721 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 10722 avx512vl_i32_info, prd>; 10723} 10724 10725multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 10726 SDNode OpNode, X86SchedWriteWidths sched, 10727 Predicate prd> { 10728 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 10729 avx512vl_i16_info, prd>, VEX_WIG; 10730 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 10731 avx512vl_i8_info, prd>, VEX_WIG; 10732} 10733 10734multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 10735 bits<8> opc_d, bits<8> opc_q, 10736 string OpcodeStr, SDNode OpNode, 10737 X86SchedWriteWidths sched> { 10738 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 10739 HasAVX512>, 10740 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 10741 HasBWI>; 10742} 10743 10744defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 10745 SchedWriteVecALU>; 10746 10747// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 10748let Predicates = [HasAVX512, NoVLX] in { 10749 def : Pat<(v4i64 (abs VR256X:$src)), 10750 (EXTRACT_SUBREG 10751 (VPABSQZrr 10752 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 10753 sub_ymm)>; 10754 def : Pat<(v2i64 (abs VR128X:$src)), 10755 (EXTRACT_SUBREG 10756 (VPABSQZrr 10757 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 10758 sub_xmm)>; 10759} 10760 10761// Use 512bit version to implement 128/256 bit. 10762multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 10763 AVX512VLVectorVTInfo _, Predicate prd> { 10764 let Predicates = [prd, NoVLX] in { 10765 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 10766 (EXTRACT_SUBREG 10767 (!cast<Instruction>(InstrStr # "Zrr") 10768 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10769 _.info256.RC:$src1, 10770 _.info256.SubRegIdx)), 10771 _.info256.SubRegIdx)>; 10772 10773 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 10774 (EXTRACT_SUBREG 10775 (!cast<Instruction>(InstrStr # "Zrr") 10776 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10777 _.info128.RC:$src1, 10778 _.info128.SubRegIdx)), 10779 _.info128.SubRegIdx)>; 10780 } 10781} 10782 10783defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 10784 SchedWriteVecIMul, HasCDI>; 10785 10786// FIXME: Is there a better scheduler class for VPCONFLICT? 10787defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 10788 SchedWriteVecALU, HasCDI>; 10789 10790// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 10791defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 10792defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 10793 10794//===---------------------------------------------------------------------===// 10795// Counts number of ones - VPOPCNTD and VPOPCNTQ 10796//===---------------------------------------------------------------------===// 10797 10798// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 10799defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 10800 SchedWriteVecALU, HasVPOPCNTDQ>; 10801 10802defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 10803defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 10804 10805//===---------------------------------------------------------------------===// 10806// Replicate Single FP - MOVSHDUP and MOVSLDUP 10807//===---------------------------------------------------------------------===// 10808 10809multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 10810 X86SchedWriteWidths sched> { 10811 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 10812 avx512vl_f32_info, HasAVX512>, XS; 10813} 10814 10815defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 10816 SchedWriteFShuffle>; 10817defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 10818 SchedWriteFShuffle>; 10819 10820//===----------------------------------------------------------------------===// 10821// AVX-512 - MOVDDUP 10822//===----------------------------------------------------------------------===// 10823 10824multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 10825 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10826 let ExeDomain = _.ExeDomain in { 10827 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10828 (ins _.RC:$src), OpcodeStr, "$src", "$src", 10829 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 10830 Sched<[sched]>; 10831 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10832 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 10833 (_.VT (_.BroadcastLdFrag addr:$src))>, 10834 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 10835 Sched<[sched.Folded]>; 10836 } 10837} 10838 10839multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 10840 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 10841 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 10842 VTInfo.info512>, EVEX_V512; 10843 10844 let Predicates = [HasAVX512, HasVLX] in { 10845 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 10846 VTInfo.info256>, EVEX_V256; 10847 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 10848 VTInfo.info128>, EVEX_V128; 10849 } 10850} 10851 10852multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, 10853 X86SchedWriteWidths sched> { 10854 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, 10855 avx512vl_f64_info>, XD, VEX_W; 10856} 10857 10858defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; 10859 10860let Predicates = [HasVLX] in { 10861def : Pat<(v2f64 (X86VBroadcast f64:$src)), 10862 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10863def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), 10864 (VMOVDDUPZ128rm addr:$src)>; 10865def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), 10866 (VMOVDDUPZ128rm addr:$src)>; 10867 10868def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10869 (v2f64 VR128X:$src0)), 10870 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 10871 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10872def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10873 immAllZerosV), 10874 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10875 10876def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), 10877 (v2f64 VR128X:$src0)), 10878 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10879def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), 10880 immAllZerosV), 10881 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10882 10883def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), 10884 (v2f64 VR128X:$src0)), 10885 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10886def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), 10887 immAllZerosV), 10888 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10889} 10890 10891//===----------------------------------------------------------------------===// 10892// AVX-512 - Unpack Instructions 10893//===----------------------------------------------------------------------===// 10894 10895defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, 10896 SchedWriteFShuffleSizes, 0, 1>; 10897defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, 10898 SchedWriteFShuffleSizes>; 10899 10900defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 10901 SchedWriteShuffle, HasBWI>; 10902defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 10903 SchedWriteShuffle, HasBWI>; 10904defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 10905 SchedWriteShuffle, HasBWI>; 10906defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 10907 SchedWriteShuffle, HasBWI>; 10908 10909defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 10910 SchedWriteShuffle, HasAVX512>; 10911defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 10912 SchedWriteShuffle, HasAVX512>; 10913defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 10914 SchedWriteShuffle, HasAVX512>; 10915defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 10916 SchedWriteShuffle, HasAVX512>; 10917 10918//===----------------------------------------------------------------------===// 10919// AVX-512 - Extract & Insert Integer Instructions 10920//===----------------------------------------------------------------------===// 10921 10922multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10923 X86VectorVTInfo _> { 10924 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 10925 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10926 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10927 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), 10928 addr:$dst)]>, 10929 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 10930} 10931 10932multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 10933 let Predicates = [HasBWI] in { 10934 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 10935 (ins _.RC:$src1, u8imm:$src2), 10936 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10937 [(set GR32orGR64:$dst, 10938 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, 10939 EVEX, TAPD, Sched<[WriteVecExtract]>; 10940 10941 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 10942 } 10943} 10944 10945multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 10946 let Predicates = [HasBWI] in { 10947 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 10948 (ins _.RC:$src1, u8imm:$src2), 10949 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10950 [(set GR32orGR64:$dst, 10951 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, 10952 EVEX, PD, Sched<[WriteVecExtract]>; 10953 10954 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 10955 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 10956 (ins _.RC:$src1, u8imm:$src2), 10957 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 10958 EVEX, TAPD, FoldGenData<NAME#rr>, 10959 Sched<[WriteVecExtract]>; 10960 10961 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 10962 } 10963} 10964 10965multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 10966 RegisterClass GRC> { 10967 let Predicates = [HasDQI] in { 10968 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 10969 (ins _.RC:$src1, u8imm:$src2), 10970 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10971 [(set GRC:$dst, 10972 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 10973 EVEX, TAPD, Sched<[WriteVecExtract]>; 10974 10975 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 10976 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10977 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10978 [(store (extractelt (_.VT _.RC:$src1), 10979 imm:$src2),addr:$dst)]>, 10980 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 10981 Sched<[WriteVecExtractSt]>; 10982 } 10983} 10984 10985defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 10986defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 10987defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 10988defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 10989 10990multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10991 X86VectorVTInfo _, PatFrag LdFrag> { 10992 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 10993 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10994 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10995 [(set _.RC:$dst, 10996 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, 10997 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 10998} 10999 11000multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11001 X86VectorVTInfo _, PatFrag LdFrag> { 11002 let Predicates = [HasBWI] in { 11003 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11004 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11005 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11006 [(set _.RC:$dst, 11007 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V, 11008 Sched<[WriteVecInsert]>; 11009 11010 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>; 11011 } 11012} 11013 11014multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11015 X86VectorVTInfo _, RegisterClass GRC> { 11016 let Predicates = [HasDQI] in { 11017 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11018 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11019 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11020 [(set _.RC:$dst, 11021 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11022 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 11023 11024 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11025 _.ScalarLdFrag>, TAPD; 11026 } 11027} 11028 11029defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11030 extloadi8>, TAPD, VEX_WIG; 11031defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11032 extloadi16>, PD, VEX_WIG; 11033defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11034defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 11035 11036//===----------------------------------------------------------------------===// 11037// VSHUFPS - VSHUFPD Operations 11038//===----------------------------------------------------------------------===// 11039 11040multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, 11041 AVX512VLVectorVTInfo VTInfo_FP>{ 11042 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11043 SchedWriteFShuffle>, 11044 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11045 AVX512AIi8Base, EVEX_4V; 11046} 11047 11048defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; 11049defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; 11050 11051//===----------------------------------------------------------------------===// 11052// AVX-512 - Byte shift Left/Right 11053//===----------------------------------------------------------------------===// 11054 11055// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well? 11056multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11057 Format MRMm, string OpcodeStr, 11058 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11059 def rr : AVX512<opc, MRMr, 11060 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11061 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11062 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11063 Sched<[sched]>; 11064 def rm : AVX512<opc, MRMm, 11065 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11066 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11067 [(set _.RC:$dst,(_.VT (OpNode 11068 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11069 (i8 timm:$src2))))]>, 11070 Sched<[sched.Folded, sched.ReadAfterFold]>; 11071} 11072 11073multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11074 Format MRMm, string OpcodeStr, 11075 X86SchedWriteWidths sched, Predicate prd>{ 11076 let Predicates = [prd] in 11077 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11078 sched.ZMM, v64i8_info>, EVEX_V512; 11079 let Predicates = [prd, HasVLX] in { 11080 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11081 sched.YMM, v32i8x_info>, EVEX_V256; 11082 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11083 sched.XMM, v16i8x_info>, EVEX_V128; 11084 } 11085} 11086defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11087 SchedWriteShuffle, HasBWI>, 11088 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11089defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11090 SchedWriteShuffle, HasBWI>, 11091 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11092 11093multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11094 string OpcodeStr, X86FoldableSchedWrite sched, 11095 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11096 let isCommutable = 1 in 11097 def rr : AVX512BI<opc, MRMSrcReg, 11098 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11099 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11100 [(set _dst.RC:$dst,(_dst.VT 11101 (OpNode (_src.VT _src.RC:$src1), 11102 (_src.VT _src.RC:$src2))))]>, 11103 Sched<[sched]>; 11104 def rm : AVX512BI<opc, MRMSrcMem, 11105 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11106 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11107 [(set _dst.RC:$dst,(_dst.VT 11108 (OpNode (_src.VT _src.RC:$src1), 11109 (_src.VT (bitconvert 11110 (_src.LdFrag addr:$src2))))))]>, 11111 Sched<[sched.Folded, sched.ReadAfterFold]>; 11112} 11113 11114multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11115 string OpcodeStr, X86SchedWriteWidths sched, 11116 Predicate prd> { 11117 let Predicates = [prd] in 11118 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11119 v8i64_info, v64i8_info>, EVEX_V512; 11120 let Predicates = [prd, HasVLX] in { 11121 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11122 v4i64x_info, v32i8x_info>, EVEX_V256; 11123 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11124 v2i64x_info, v16i8x_info>, EVEX_V128; 11125 } 11126} 11127 11128defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11129 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11130 11131// Transforms to swizzle an immediate to enable better matching when 11132// memory operand isn't in the right place. 11133def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11134 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11135 uint8_t Imm = N->getZExtValue(); 11136 // Swap bits 1/4 and 3/6. 11137 uint8_t NewImm = Imm & 0xa5; 11138 if (Imm & 0x02) NewImm |= 0x10; 11139 if (Imm & 0x10) NewImm |= 0x02; 11140 if (Imm & 0x08) NewImm |= 0x40; 11141 if (Imm & 0x40) NewImm |= 0x08; 11142 return getI8Imm(NewImm, SDLoc(N)); 11143}]>; 11144def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11145 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11146 uint8_t Imm = N->getZExtValue(); 11147 // Swap bits 2/4 and 3/5. 11148 uint8_t NewImm = Imm & 0xc3; 11149 if (Imm & 0x04) NewImm |= 0x10; 11150 if (Imm & 0x10) NewImm |= 0x04; 11151 if (Imm & 0x08) NewImm |= 0x20; 11152 if (Imm & 0x20) NewImm |= 0x08; 11153 return getI8Imm(NewImm, SDLoc(N)); 11154}]>; 11155def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11156 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11157 uint8_t Imm = N->getZExtValue(); 11158 // Swap bits 1/2 and 5/6. 11159 uint8_t NewImm = Imm & 0x99; 11160 if (Imm & 0x02) NewImm |= 0x04; 11161 if (Imm & 0x04) NewImm |= 0x02; 11162 if (Imm & 0x20) NewImm |= 0x40; 11163 if (Imm & 0x40) NewImm |= 0x20; 11164 return getI8Imm(NewImm, SDLoc(N)); 11165}]>; 11166def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11167 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11168 uint8_t Imm = N->getZExtValue(); 11169 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11170 uint8_t NewImm = Imm & 0x81; 11171 if (Imm & 0x02) NewImm |= 0x04; 11172 if (Imm & 0x04) NewImm |= 0x10; 11173 if (Imm & 0x08) NewImm |= 0x40; 11174 if (Imm & 0x10) NewImm |= 0x02; 11175 if (Imm & 0x20) NewImm |= 0x08; 11176 if (Imm & 0x40) NewImm |= 0x20; 11177 return getI8Imm(NewImm, SDLoc(N)); 11178}]>; 11179def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11180 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11181 uint8_t Imm = N->getZExtValue(); 11182 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11183 uint8_t NewImm = Imm & 0x81; 11184 if (Imm & 0x02) NewImm |= 0x10; 11185 if (Imm & 0x04) NewImm |= 0x02; 11186 if (Imm & 0x08) NewImm |= 0x20; 11187 if (Imm & 0x10) NewImm |= 0x04; 11188 if (Imm & 0x20) NewImm |= 0x40; 11189 if (Imm & 0x40) NewImm |= 0x08; 11190 return getI8Imm(NewImm, SDLoc(N)); 11191}]>; 11192 11193multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11194 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11195 string Name>{ 11196 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11197 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11198 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11199 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11200 (OpNode (_.VT _.RC:$src1), 11201 (_.VT _.RC:$src2), 11202 (_.VT _.RC:$src3), 11203 (i8 timm:$src4)), 1, 1>, 11204 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11205 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11206 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11207 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11208 (OpNode (_.VT _.RC:$src1), 11209 (_.VT _.RC:$src2), 11210 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11211 (i8 timm:$src4)), 1, 0>, 11212 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11213 Sched<[sched.Folded, sched.ReadAfterFold]>; 11214 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11215 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11216 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11217 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11218 (OpNode (_.VT _.RC:$src1), 11219 (_.VT _.RC:$src2), 11220 (_.VT (_.BroadcastLdFrag addr:$src3)), 11221 (i8 timm:$src4)), 1, 0>, EVEX_B, 11222 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11223 Sched<[sched.Folded, sched.ReadAfterFold]>; 11224 }// Constraints = "$src1 = $dst" 11225 11226 // Additional patterns for matching passthru operand in other positions. 11227 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11228 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11229 _.RC:$src1)), 11230 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11231 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11232 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11233 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11234 _.RC:$src1)), 11235 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11236 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11237 11238 // Additional patterns for matching loads in other positions. 11239 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), 11240 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), 11241 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11242 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11243 def : Pat<(_.VT (OpNode _.RC:$src1, 11244 (bitconvert (_.LdFrag addr:$src3)), 11245 _.RC:$src2, (i8 timm:$src4))), 11246 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11247 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11248 11249 // Additional patterns for matching zero masking with loads in other 11250 // positions. 11251 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11252 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11253 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11254 _.ImmAllZerosV)), 11255 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11256 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11257 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11258 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11259 _.RC:$src2, (i8 timm:$src4)), 11260 _.ImmAllZerosV)), 11261 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11262 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11263 11264 // Additional patterns for matching masked loads with different 11265 // operand orders. 11266 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11267 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11268 _.RC:$src2, (i8 timm:$src4)), 11269 _.RC:$src1)), 11270 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11271 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11272 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11273 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11274 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11275 _.RC:$src1)), 11276 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11277 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11278 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11279 (OpNode _.RC:$src2, _.RC:$src1, 11280 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11281 _.RC:$src1)), 11282 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11283 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11284 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11285 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11286 _.RC:$src1, (i8 timm:$src4)), 11287 _.RC:$src1)), 11288 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11289 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11290 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11291 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11292 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11293 _.RC:$src1)), 11294 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11295 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11296 11297 // Additional patterns for matching broadcasts in other positions. 11298 def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3), 11299 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), 11300 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11301 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11302 def : Pat<(_.VT (OpNode _.RC:$src1, 11303 (_.BroadcastLdFrag addr:$src3), 11304 _.RC:$src2, (i8 timm:$src4))), 11305 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11306 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11307 11308 // Additional patterns for matching zero masking with broadcasts in other 11309 // positions. 11310 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11311 (OpNode (_.BroadcastLdFrag addr:$src3), 11312 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11313 _.ImmAllZerosV)), 11314 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11315 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11316 (VPTERNLOG321_imm8 timm:$src4))>; 11317 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11318 (OpNode _.RC:$src1, 11319 (_.BroadcastLdFrag addr:$src3), 11320 _.RC:$src2, (i8 timm:$src4)), 11321 _.ImmAllZerosV)), 11322 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11323 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11324 (VPTERNLOG132_imm8 timm:$src4))>; 11325 11326 // Additional patterns for matching masked broadcasts with different 11327 // operand orders. 11328 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11329 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11330 _.RC:$src2, (i8 timm:$src4)), 11331 _.RC:$src1)), 11332 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11333 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11334 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11335 (OpNode (_.BroadcastLdFrag addr:$src3), 11336 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11337 _.RC:$src1)), 11338 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11339 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11340 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11341 (OpNode _.RC:$src2, _.RC:$src1, 11342 (_.BroadcastLdFrag addr:$src3), 11343 (i8 timm:$src4)), _.RC:$src1)), 11344 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11345 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11346 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11347 (OpNode _.RC:$src2, 11348 (_.BroadcastLdFrag addr:$src3), 11349 _.RC:$src1, (i8 timm:$src4)), 11350 _.RC:$src1)), 11351 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11352 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11353 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11354 (OpNode (_.BroadcastLdFrag addr:$src3), 11355 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11356 _.RC:$src1)), 11357 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11358 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11359} 11360 11361multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11362 AVX512VLVectorVTInfo _> { 11363 let Predicates = [HasAVX512] in 11364 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11365 _.info512, NAME>, EVEX_V512; 11366 let Predicates = [HasAVX512, HasVLX] in { 11367 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11368 _.info128, NAME>, EVEX_V128; 11369 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11370 _.info256, NAME>, EVEX_V256; 11371 } 11372} 11373 11374defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11375 avx512vl_i32_info>; 11376defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11377 avx512vl_i64_info>, VEX_W; 11378 11379// Patterns to use VPTERNLOG for vXi16/vXi8 vectors. 11380let Predicates = [HasVLX] in { 11381 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, 11382 (i8 timm:$src4))), 11383 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, 11384 timm:$src4)>; 11385 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, 11386 (loadv16i8 addr:$src3), (i8 timm:$src4))), 11387 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11388 timm:$src4)>; 11389 def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2, 11390 VR128X:$src1, (i8 timm:$src4))), 11391 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11392 (VPTERNLOG321_imm8 timm:$src4))>; 11393 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3), 11394 VR128X:$src2, (i8 timm:$src4))), 11395 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11396 (VPTERNLOG132_imm8 timm:$src4))>; 11397 11398 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, 11399 (i8 timm:$src4))), 11400 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, 11401 timm:$src4)>; 11402 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, 11403 (loadv8i16 addr:$src3), (i8 timm:$src4))), 11404 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11405 timm:$src4)>; 11406 def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2, 11407 VR128X:$src1, (i8 timm:$src4))), 11408 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11409 (VPTERNLOG321_imm8 timm:$src4))>; 11410 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3), 11411 VR128X:$src2, (i8 timm:$src4))), 11412 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11413 (VPTERNLOG132_imm8 timm:$src4))>; 11414 11415 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, 11416 (i8 timm:$src4))), 11417 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, 11418 timm:$src4)>; 11419 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, 11420 (loadv32i8 addr:$src3), (i8 timm:$src4))), 11421 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11422 timm:$src4)>; 11423 def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2, 11424 VR256X:$src1, (i8 timm:$src4))), 11425 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11426 (VPTERNLOG321_imm8 timm:$src4))>; 11427 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3), 11428 VR256X:$src2, (i8 timm:$src4))), 11429 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11430 (VPTERNLOG132_imm8 timm:$src4))>; 11431 11432 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, 11433 (i8 timm:$src4))), 11434 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, 11435 timm:$src4)>; 11436 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, 11437 (loadv16i16 addr:$src3), (i8 timm:$src4))), 11438 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11439 timm:$src4)>; 11440 def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2, 11441 VR256X:$src1, (i8 timm:$src4))), 11442 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11443 (VPTERNLOG321_imm8 timm:$src4))>; 11444 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3), 11445 VR256X:$src2, (i8 timm:$src4))), 11446 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11447 (VPTERNLOG132_imm8 timm:$src4))>; 11448} 11449 11450let Predicates = [HasAVX512] in { 11451 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, 11452 (i8 timm:$src4))), 11453 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, 11454 timm:$src4)>; 11455 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, 11456 (loadv64i8 addr:$src3), (i8 timm:$src4))), 11457 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11458 timm:$src4)>; 11459 def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2, 11460 VR512:$src1, (i8 timm:$src4))), 11461 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11462 (VPTERNLOG321_imm8 timm:$src4))>; 11463 def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3), 11464 VR512:$src2, (i8 timm:$src4))), 11465 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11466 (VPTERNLOG132_imm8 timm:$src4))>; 11467 11468 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, 11469 (i8 timm:$src4))), 11470 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, 11471 timm:$src4)>; 11472 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, 11473 (loadv32i16 addr:$src3), (i8 timm:$src4))), 11474 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11475 timm:$src4)>; 11476 def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2, 11477 VR512:$src1, (i8 timm:$src4))), 11478 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11479 (VPTERNLOG321_imm8 timm:$src4))>; 11480 def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), 11481 VR512:$src2, (i8 timm:$src4))), 11482 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11483 (VPTERNLOG132_imm8 timm:$src4))>; 11484} 11485 11486// Patterns to implement vnot using vpternlog instead of creating all ones 11487// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11488// so that the result is only dependent on src0. But we use the same source 11489// for all operands to prevent a false dependency. 11490// TODO: We should maybe have a more generalized algorithm for folding to 11491// vpternlog. 11492let Predicates = [HasAVX512] in { 11493 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)), 11494 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11495 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)), 11496 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11497 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)), 11498 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11499 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)), 11500 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11501} 11502 11503let Predicates = [HasAVX512, NoVLX] in { 11504 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11505 (EXTRACT_SUBREG 11506 (VPTERNLOGQZrri 11507 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11508 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11509 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11510 (i8 15)), sub_xmm)>; 11511 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11512 (EXTRACT_SUBREG 11513 (VPTERNLOGQZrri 11514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11515 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11516 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11517 (i8 15)), sub_xmm)>; 11518 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11519 (EXTRACT_SUBREG 11520 (VPTERNLOGQZrri 11521 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11522 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11523 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11524 (i8 15)), sub_xmm)>; 11525 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11526 (EXTRACT_SUBREG 11527 (VPTERNLOGQZrri 11528 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11529 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11530 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11531 (i8 15)), sub_xmm)>; 11532 11533 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11534 (EXTRACT_SUBREG 11535 (VPTERNLOGQZrri 11536 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11537 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11538 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11539 (i8 15)), sub_ymm)>; 11540 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11541 (EXTRACT_SUBREG 11542 (VPTERNLOGQZrri 11543 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11544 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11545 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11546 (i8 15)), sub_ymm)>; 11547 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11548 (EXTRACT_SUBREG 11549 (VPTERNLOGQZrri 11550 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11551 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11552 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11553 (i8 15)), sub_ymm)>; 11554 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11555 (EXTRACT_SUBREG 11556 (VPTERNLOGQZrri 11557 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11558 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11559 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11560 (i8 15)), sub_ymm)>; 11561} 11562 11563let Predicates = [HasVLX] in { 11564 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11565 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11566 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11567 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11568 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11569 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11570 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11571 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11572 11573 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11574 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11575 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11576 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11577 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11578 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11579 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11580 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11581} 11582 11583//===----------------------------------------------------------------------===// 11584// AVX-512 - FixupImm 11585//===----------------------------------------------------------------------===// 11586 11587multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11588 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11589 X86VectorVTInfo TblVT>{ 11590 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11591 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11592 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11593 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11594 (X86VFixupimm (_.VT _.RC:$src1), 11595 (_.VT _.RC:$src2), 11596 (TblVT.VT _.RC:$src3), 11597 (i32 timm:$src4))>, Sched<[sched]>; 11598 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11599 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11600 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11601 (X86VFixupimm (_.VT _.RC:$src1), 11602 (_.VT _.RC:$src2), 11603 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11604 (i32 timm:$src4))>, 11605 Sched<[sched.Folded, sched.ReadAfterFold]>; 11606 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11607 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11608 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11609 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11610 (X86VFixupimm (_.VT _.RC:$src1), 11611 (_.VT _.RC:$src2), 11612 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 11613 (i32 timm:$src4))>, 11614 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11615 } // Constraints = "$src1 = $dst" 11616} 11617 11618multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11619 X86FoldableSchedWrite sched, 11620 X86VectorVTInfo _, X86VectorVTInfo TblVT> 11621 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 11622let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11623 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11624 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11625 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11626 "$src2, $src3, {sae}, $src4", 11627 (X86VFixupimmSAE (_.VT _.RC:$src1), 11628 (_.VT _.RC:$src2), 11629 (TblVT.VT _.RC:$src3), 11630 (i32 timm:$src4))>, 11631 EVEX_B, Sched<[sched]>; 11632 } 11633} 11634 11635multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 11636 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11637 X86VectorVTInfo _src3VT> { 11638 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11639 ExeDomain = _.ExeDomain in { 11640 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11641 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11642 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11643 (X86VFixupimms (_.VT _.RC:$src1), 11644 (_.VT _.RC:$src2), 11645 (_src3VT.VT _src3VT.RC:$src3), 11646 (i32 timm:$src4))>, Sched<[sched]>; 11647 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11648 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11649 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11650 "$src2, $src3, {sae}, $src4", 11651 (X86VFixupimmSAEs (_.VT _.RC:$src1), 11652 (_.VT _.RC:$src2), 11653 (_src3VT.VT _src3VT.RC:$src3), 11654 (i32 timm:$src4))>, 11655 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11656 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11657 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11658 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11659 (X86VFixupimms (_.VT _.RC:$src1), 11660 (_.VT _.RC:$src2), 11661 (_src3VT.VT (scalar_to_vector 11662 (_src3VT.ScalarLdFrag addr:$src3))), 11663 (i32 timm:$src4))>, 11664 Sched<[sched.Folded, sched.ReadAfterFold]>; 11665 } 11666} 11667 11668multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11669 AVX512VLVectorVTInfo _Vec, 11670 AVX512VLVectorVTInfo _Tbl> { 11671 let Predicates = [HasAVX512] in 11672 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 11673 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11674 EVEX_4V, EVEX_V512; 11675 let Predicates = [HasAVX512, HasVLX] in { 11676 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 11677 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11678 EVEX_4V, EVEX_V128; 11679 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 11680 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11681 EVEX_4V, EVEX_V256; 11682 } 11683} 11684 11685defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11686 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11687 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11688defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11689 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11690 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11691defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11692 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11693defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11694 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 11695 11696// Patterns used to select SSE scalar fp arithmetic instructions from 11697// either: 11698// 11699// (1) a scalar fp operation followed by a blend 11700// 11701// The effect is that the backend no longer emits unnecessary vector 11702// insert instructions immediately after SSE scalar fp instructions 11703// like addss or mulss. 11704// 11705// For example, given the following code: 11706// __m128 foo(__m128 A, __m128 B) { 11707// A[0] += B[0]; 11708// return A; 11709// } 11710// 11711// Previously we generated: 11712// addss %xmm0, %xmm1 11713// movss %xmm1, %xmm0 11714// 11715// We now generate: 11716// addss %xmm1, %xmm0 11717// 11718// (2) a vector packed single/double fp operation followed by a vector insert 11719// 11720// The effect is that the backend converts the packed fp instruction 11721// followed by a vector insert into a single SSE scalar fp instruction. 11722// 11723// For example, given the following code: 11724// __m128 foo(__m128 A, __m128 B) { 11725// __m128 C = A + B; 11726// return (__m128) {c[0], a[1], a[2], a[3]}; 11727// } 11728// 11729// Previously we generated: 11730// addps %xmm0, %xmm1 11731// movss %xmm1, %xmm0 11732// 11733// We now generate: 11734// addss %xmm1, %xmm0 11735 11736// TODO: Some canonicalization in lowering would simplify the number of 11737// patterns we have to try to match. 11738multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode, 11739 X86VectorVTInfo _, PatLeaf ZeroFP> { 11740 let Predicates = [HasAVX512] in { 11741 // extracted scalar math op with insert via movss 11742 def : Pat<(MoveNode 11743 (_.VT VR128X:$dst), 11744 (_.VT (scalar_to_vector 11745 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11746 _.FRC:$src)))), 11747 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, 11748 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 11749 def : Pat<(MoveNode 11750 (_.VT VR128X:$dst), 11751 (_.VT (scalar_to_vector 11752 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11753 (_.ScalarLdFrag addr:$src))))), 11754 (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>; 11755 11756 // extracted masked scalar math op with insert via movss 11757 def : Pat<(MoveNode (_.VT VR128X:$src1), 11758 (scalar_to_vector 11759 (X86selects VK1WM:$mask, 11760 (Op (_.EltVT 11761 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11762 _.FRC:$src2), 11763 _.FRC:$src0))), 11764 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk) 11765 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11766 VK1WM:$mask, _.VT:$src1, 11767 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11768 def : Pat<(MoveNode (_.VT VR128X:$src1), 11769 (scalar_to_vector 11770 (X86selects VK1WM:$mask, 11771 (Op (_.EltVT 11772 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11773 (_.ScalarLdFrag addr:$src2)), 11774 _.FRC:$src0))), 11775 (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk) 11776 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11777 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11778 11779 // extracted masked scalar math op with insert via movss 11780 def : Pat<(MoveNode (_.VT VR128X:$src1), 11781 (scalar_to_vector 11782 (X86selects VK1WM:$mask, 11783 (Op (_.EltVT 11784 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11785 _.FRC:$src2), (_.EltVT ZeroFP)))), 11786 (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 11787 VK1WM:$mask, _.VT:$src1, 11788 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11789 def : Pat<(MoveNode (_.VT VR128X:$src1), 11790 (scalar_to_vector 11791 (X86selects VK1WM:$mask, 11792 (Op (_.EltVT 11793 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11794 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 11795 (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11796 } 11797} 11798 11799defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 11800defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 11801defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 11802defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 11803 11804defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 11805defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 11806defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 11807defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 11808 11809multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, 11810 SDNode Move, X86VectorVTInfo _> { 11811 let Predicates = [HasAVX512] in { 11812 def : Pat<(_.VT (Move _.VT:$dst, 11813 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11814 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>; 11815 } 11816} 11817 11818defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 11819defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 11820 11821//===----------------------------------------------------------------------===// 11822// AES instructions 11823//===----------------------------------------------------------------------===// 11824 11825multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 11826 let Predicates = [HasVLX, HasVAES] in { 11827 defm Z128 : AESI_binop_rm_int<Op, OpStr, 11828 !cast<Intrinsic>(IntPrefix), 11829 loadv2i64, 0, VR128X, i128mem>, 11830 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 11831 defm Z256 : AESI_binop_rm_int<Op, OpStr, 11832 !cast<Intrinsic>(IntPrefix##"_256"), 11833 loadv4i64, 0, VR256X, i256mem>, 11834 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 11835 } 11836 let Predicates = [HasAVX512, HasVAES] in 11837 defm Z : AESI_binop_rm_int<Op, OpStr, 11838 !cast<Intrinsic>(IntPrefix##"_512"), 11839 loadv8i64, 0, VR512, i512mem>, 11840 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 11841} 11842 11843defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 11844defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 11845defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 11846defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 11847 11848//===----------------------------------------------------------------------===// 11849// PCLMUL instructions - Carry less multiplication 11850//===----------------------------------------------------------------------===// 11851 11852let Predicates = [HasAVX512, HasVPCLMULQDQ] in 11853defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 11854 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 11855 11856let Predicates = [HasVLX, HasVPCLMULQDQ] in { 11857defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 11858 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 11859 11860defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 11861 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 11862 EVEX_CD8<64, CD8VF>, VEX_WIG; 11863} 11864 11865// Aliases 11866defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 11867defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 11868defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 11869 11870//===----------------------------------------------------------------------===// 11871// VBMI2 11872//===----------------------------------------------------------------------===// 11873 11874multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 11875 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11876 let Constraints = "$src1 = $dst", 11877 ExeDomain = VTI.ExeDomain in { 11878 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11879 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11880 "$src3, $src2", "$src2, $src3", 11881 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 11882 AVX512FMA3Base, Sched<[sched]>; 11883 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11884 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11885 "$src3, $src2", "$src2, $src3", 11886 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11887 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11888 AVX512FMA3Base, 11889 Sched<[sched.Folded, sched.ReadAfterFold]>; 11890 } 11891} 11892 11893multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11894 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 11895 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 11896 let Constraints = "$src1 = $dst", 11897 ExeDomain = VTI.ExeDomain in 11898 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11899 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 11900 "${src3}"##VTI.BroadcastStr##", $src2", 11901 "$src2, ${src3}"##VTI.BroadcastStr, 11902 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11903 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 11904 AVX512FMA3Base, EVEX_B, 11905 Sched<[sched.Folded, sched.ReadAfterFold]>; 11906} 11907 11908multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 11909 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11910 let Predicates = [HasVBMI2] in 11911 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11912 EVEX_V512; 11913 let Predicates = [HasVBMI2, HasVLX] in { 11914 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11915 EVEX_V256; 11916 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11917 EVEX_V128; 11918 } 11919} 11920 11921multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 11922 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11923 let Predicates = [HasVBMI2] in 11924 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11925 EVEX_V512; 11926 let Predicates = [HasVBMI2, HasVLX] in { 11927 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11928 EVEX_V256; 11929 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11930 EVEX_V128; 11931 } 11932} 11933multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 11934 SDNode OpNode, X86SchedWriteWidths sched> { 11935 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched, 11936 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 11937 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched, 11938 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11939 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched, 11940 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 11941} 11942 11943multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 11944 SDNode OpNode, X86SchedWriteWidths sched> { 11945 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched, 11946 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 11947 VEX_W, EVEX_CD8<16, CD8VF>; 11948 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp, 11949 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11950 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode, 11951 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11952} 11953 11954// Concat & Shift 11955defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 11956defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 11957defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 11958defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 11959 11960// Compress 11961defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 11962 avx512vl_i8_info, HasVBMI2>, EVEX, 11963 NotMemoryFoldable; 11964defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 11965 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 11966 NotMemoryFoldable; 11967// Expand 11968defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 11969 avx512vl_i8_info, HasVBMI2>, EVEX; 11970defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 11971 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 11972 11973//===----------------------------------------------------------------------===// 11974// VNNI 11975//===----------------------------------------------------------------------===// 11976 11977let Constraints = "$src1 = $dst" in 11978multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11979 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11980 bit IsCommutable> { 11981 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11982 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11983 "$src3, $src2", "$src2, $src3", 11984 (VTI.VT (OpNode VTI.RC:$src1, 11985 VTI.RC:$src2, VTI.RC:$src3)), 11986 IsCommutable, IsCommutable>, 11987 EVEX_4V, T8PD, Sched<[sched]>; 11988 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11989 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11990 "$src3, $src2", "$src2, $src3", 11991 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11992 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11993 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 11994 Sched<[sched.Folded, sched.ReadAfterFold]>; 11995 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11996 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 11997 OpStr, "${src3}"##VTI.BroadcastStr##", $src2", 11998 "$src2, ${src3}"##VTI.BroadcastStr, 11999 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12000 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12001 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 12002 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 12003} 12004 12005multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12006 X86SchedWriteWidths sched, bit IsCommutable> { 12007 let Predicates = [HasVNNI] in 12008 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12009 IsCommutable>, EVEX_V512; 12010 let Predicates = [HasVNNI, HasVLX] in { 12011 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12012 IsCommutable>, EVEX_V256; 12013 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12014 IsCommutable>, EVEX_V128; 12015 } 12016} 12017 12018// FIXME: Is there a better scheduler class for VPDP? 12019defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12020defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12021defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12022defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12023 12024def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), 12025 (X86vpmaddwd node:$lhs, node:$rhs), [{ 12026 return N->hasOneUse(); 12027}]>; 12028 12029// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12030let Predicates = [HasVNNI] in { 12031 def : Pat<(v16i32 (add VR512:$src1, 12032 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12033 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12034 def : Pat<(v16i32 (add VR512:$src1, 12035 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12036 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12037} 12038let Predicates = [HasVNNI,HasVLX] in { 12039 def : Pat<(v8i32 (add VR256X:$src1, 12040 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12041 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12042 def : Pat<(v8i32 (add VR256X:$src1, 12043 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12044 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12045 def : Pat<(v4i32 (add VR128X:$src1, 12046 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12047 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12048 def : Pat<(v4i32 (add VR128X:$src1, 12049 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12050 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12051} 12052 12053//===----------------------------------------------------------------------===// 12054// Bit Algorithms 12055//===----------------------------------------------------------------------===// 12056 12057// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12058defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12059 avx512vl_i8_info, HasBITALG>; 12060defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12061 avx512vl_i16_info, HasBITALG>, VEX_W; 12062 12063defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12064defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12065 12066def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 12067 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 12068 return N->hasOneUse(); 12069}]>; 12070 12071multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12072 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12073 (ins VTI.RC:$src1, VTI.RC:$src2), 12074 "vpshufbitqmb", 12075 "$src2, $src1", "$src1, $src2", 12076 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12077 (VTI.VT VTI.RC:$src2)), 12078 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12079 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 12080 Sched<[sched]>; 12081 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12082 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12083 "vpshufbitqmb", 12084 "$src2, $src1", "$src1, $src2", 12085 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12086 (VTI.VT (VTI.LdFrag addr:$src2))), 12087 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12088 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12089 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 12090 Sched<[sched.Folded, sched.ReadAfterFold]>; 12091} 12092 12093multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12094 let Predicates = [HasBITALG] in 12095 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12096 let Predicates = [HasBITALG, HasVLX] in { 12097 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12098 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12099 } 12100} 12101 12102// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12103defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12104 12105//===----------------------------------------------------------------------===// 12106// GFNI 12107//===----------------------------------------------------------------------===// 12108 12109multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12110 X86SchedWriteWidths sched> { 12111 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12112 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12113 EVEX_V512; 12114 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12115 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12116 EVEX_V256; 12117 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12118 EVEX_V128; 12119 } 12120} 12121 12122defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12123 SchedWriteVecALU>, 12124 EVEX_CD8<8, CD8VF>, T8PD; 12125 12126multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12127 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12128 X86VectorVTInfo BcstVTI> 12129 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12130 let ExeDomain = VTI.ExeDomain in 12131 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12132 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12133 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", 12134 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", 12135 (OpNode (VTI.VT VTI.RC:$src1), 12136 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12137 (i8 timm:$src3))>, EVEX_B, 12138 Sched<[sched.Folded, sched.ReadAfterFold]>; 12139} 12140 12141multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12142 X86SchedWriteWidths sched> { 12143 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12144 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12145 v64i8_info, v8i64_info>, EVEX_V512; 12146 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12147 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12148 v32i8x_info, v4i64x_info>, EVEX_V256; 12149 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12150 v16i8x_info, v2i64x_info>, EVEX_V128; 12151 } 12152} 12153 12154defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12155 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12156 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12157defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12158 X86GF2P8affineqb, SchedWriteVecIMul>, 12159 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12160 12161 12162//===----------------------------------------------------------------------===// 12163// AVX5124FMAPS 12164//===----------------------------------------------------------------------===// 12165 12166let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12167 Constraints = "$src1 = $dst" in { 12168defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12169 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12170 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12171 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12172 Sched<[SchedWriteFMA.ZMM.Folded]>; 12173 12174defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12175 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12176 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12177 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12178 Sched<[SchedWriteFMA.ZMM.Folded]>; 12179 12180defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12181 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12182 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12183 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12184 Sched<[SchedWriteFMA.Scl.Folded]>; 12185 12186defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12187 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12188 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12189 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12190 Sched<[SchedWriteFMA.Scl.Folded]>; 12191} 12192 12193//===----------------------------------------------------------------------===// 12194// AVX5124VNNIW 12195//===----------------------------------------------------------------------===// 12196 12197let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12198 Constraints = "$src1 = $dst" in { 12199defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12200 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12201 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12202 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12203 Sched<[SchedWriteFMA.ZMM.Folded]>; 12204 12205defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12206 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12207 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12208 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12209 Sched<[SchedWriteFMA.ZMM.Folded]>; 12210} 12211 12212let hasSideEffects = 0 in { 12213 let mayStore = 1 in 12214 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12215 let mayLoad = 1 in 12216 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12217} 12218 12219//===----------------------------------------------------------------------===// 12220// VP2INTERSECT 12221//===----------------------------------------------------------------------===// 12222 12223multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> { 12224 def rr : I<0x68, MRMSrcReg, 12225 (outs _.KRPC:$dst), 12226 (ins _.RC:$src1, _.RC:$src2), 12227 !strconcat("vp2intersect", _.Suffix, 12228 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12229 [(set _.KRPC:$dst, (X86vp2intersect 12230 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12231 EVEX_4V, T8XD; 12232 12233 def rm : I<0x68, MRMSrcMem, 12234 (outs _.KRPC:$dst), 12235 (ins _.RC:$src1, _.MemOp:$src2), 12236 !strconcat("vp2intersect", _.Suffix, 12237 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12238 [(set _.KRPC:$dst, (X86vp2intersect 12239 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12240 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>; 12241 12242 def rmb : I<0x68, MRMSrcMem, 12243 (outs _.KRPC:$dst), 12244 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12245 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12246 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12247 [(set _.KRPC:$dst, (X86vp2intersect 12248 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12249 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; 12250} 12251 12252multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> { 12253 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12254 defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512; 12255 12256 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12257 defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256; 12258 defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128; 12259 } 12260} 12261 12262defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>; 12263defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W; 12264 12265multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12266 X86SchedWriteWidths sched, 12267 AVX512VLVectorVTInfo _SrcVTInfo, 12268 AVX512VLVectorVTInfo _DstVTInfo, 12269 SDNode OpNode, Predicate prd, 12270 bit IsCommutable = 0> { 12271 let Predicates = [prd] in 12272 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12273 _SrcVTInfo.info512, _DstVTInfo.info512, 12274 _SrcVTInfo.info512, IsCommutable>, 12275 EVEX_V512, EVEX_CD8<32, CD8VF>; 12276 let Predicates = [HasVLX, prd] in { 12277 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12278 _SrcVTInfo.info256, _DstVTInfo.info256, 12279 _SrcVTInfo.info256, IsCommutable>, 12280 EVEX_V256, EVEX_CD8<32, CD8VF>; 12281 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12282 _SrcVTInfo.info128, _DstVTInfo.info128, 12283 _SrcVTInfo.info128, IsCommutable>, 12284 EVEX_V128, EVEX_CD8<32, CD8VF>; 12285 } 12286} 12287 12288defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12289 SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF 12290 avx512vl_f32_info, avx512vl_i16_info, 12291 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12292 12293// Truncate Float to BFloat16 12294multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12295 X86SchedWriteWidths sched> { 12296 let Predicates = [HasBF16] in { 12297 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12298 X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12299 } 12300 let Predicates = [HasBF16, HasVLX] in { 12301 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12302 null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12303 VK4WM>, EVEX_V128; 12304 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12305 X86cvtneps2bf16, 12306 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12307 12308 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 12309 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12310 VR128X:$src), 0>; 12311 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 12312 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12313 f128mem:$src), 0, "intel">; 12314 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 12315 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12316 VR256X:$src), 0>; 12317 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 12318 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12319 f256mem:$src), 0, "intel">; 12320 } 12321} 12322 12323defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12324 SchedWriteCvtPD2PS>, T8XS, 12325 EVEX_CD8<32, CD8VF>; 12326 12327let Predicates = [HasBF16, HasVLX] in { 12328 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12329 // patterns have been disabled with null_frag. 12330 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12331 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12332 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12333 VK4WM:$mask), 12334 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12335 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12336 VK4WM:$mask), 12337 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12338 12339 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12340 (VCVTNEPS2BF16Z128rm addr:$src)>; 12341 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12342 VK4WM:$mask), 12343 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12344 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12345 VK4WM:$mask), 12346 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12347 12348 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12349 (X86VBroadcastld32 addr:$src)))), 12350 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12351 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12352 (v8i16 VR128X:$src0), VK4WM:$mask), 12353 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12354 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12355 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12356 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12357} 12358 12359let Constraints = "$src1 = $dst" in { 12360multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12361 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12362 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12363 (ins _.RC:$src2, _.RC:$src3), 12364 OpcodeStr, "$src3, $src2", "$src2, $src3", 12365 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, 12366 EVEX_4V; 12367 12368 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12369 (ins _.RC:$src2, _.MemOp:$src3), 12370 OpcodeStr, "$src3, $src2", "$src2, $src3", 12371 (_.VT (OpNode _.RC:$src1, _.RC:$src2, 12372 (src_v.VT (bitconvert 12373 (src_v.LdFrag addr:$src3)))))>, EVEX_4V; 12374 12375 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12376 (ins _.RC:$src2, _.ScalarMemOp:$src3), 12377 OpcodeStr, 12378 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12379 !strconcat("$src2, ${src3}", _.BroadcastStr), 12380 (_.VT (OpNode _.RC:$src1, _.RC:$src2, 12381 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12382 EVEX_B, EVEX_4V; 12383 12384} 12385} // Constraints = "$src1 = $dst" 12386 12387multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12388 AVX512VLVectorVTInfo _, 12389 AVX512VLVectorVTInfo src_v, Predicate prd> { 12390 let Predicates = [prd] in { 12391 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512, 12392 src_v.info512>, EVEX_V512; 12393 } 12394 let Predicates = [HasVLX, prd] in { 12395 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256, 12396 src_v.info256>, EVEX_V256; 12397 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128, 12398 src_v.info128>, EVEX_V128; 12399 } 12400} 12401 12402defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, 12403 avx512vl_f32_info, avx512vl_i32_info, 12404 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12405