1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 32), 4, 48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; 49 50 // The vector VT. 51 ValueType VT = !cast<ValueType>(VTName); 52 53 string EltTypeName = !cast<string>(EltVT); 54 // Size of the element type in bits, e.g. 32 for v16i32. 55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 56 int EltSize = EltVT.Size; 57 58 // "i" for integer types and "f" for floating-point types 59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 60 61 // Size of RC in bits, e.g. 512 for VR512. 62 int Size = VT.Size; 63 64 // The corresponding memory operand, e.g. i512mem for VR512. 65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 67 // FP scalar memory operand for intrinsics - ssmem/sdmem. 68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)); 70 71 // Load patterns 72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 73 74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 75 76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 77 78 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), 79 !cast<ComplexPattern>("sse_load_f32"), 80 !if (!eq (EltTypeName, "f64"), 81 !cast<ComplexPattern>("sse_load_f64"), 82 ?)); 83 84 // The string to specify embedded broadcast in assembly. 85 string BroadcastStr = "{1to" # NumElts # "}"; 86 87 // 8-bit compressed displacement tuple/subvector format. This is only 88 // defined for NumElts <= 8. 89 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 90 !cast<CD8VForm>("CD8VT" # NumElts), ?); 91 92 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 93 !if (!eq (Size, 256), sub_ymm, ?)); 94 95 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 96 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 97 SSEPackedInt)); 98 99 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); 100 101 dag ImmAllZerosV = (VT immAllZerosV); 102 103 string ZSuffix = !if (!eq (Size, 128), "Z128", 104 !if (!eq (Size, 256), "Z256", "Z")); 105} 106 107def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 108def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 109def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 110def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 111def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 112def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 113 114// "x" in v32i8x_info means RC = VR256X 115def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 116def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 117def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 118def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 119def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 120def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 121 122def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 123def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 124def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 125def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 126def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 127def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 128 129// We map scalar types to the smallest (128-bit) vector type 130// with the appropriate element type. This allows to use the same masking logic. 131def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 132def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 133def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 134def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 135 136class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 137 X86VectorVTInfo i128> { 138 X86VectorVTInfo info512 = i512; 139 X86VectorVTInfo info256 = i256; 140 X86VectorVTInfo info128 = i128; 141} 142 143def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 144 v16i8x_info>; 145def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 146 v8i16x_info>; 147def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 148 v4i32x_info>; 149def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 150 v2i64x_info>; 151def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 152 v4f32x_info>; 153def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 154 v2f64x_info>; 155 156class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 157 ValueType _vt> { 158 RegisterClass KRC = _krc; 159 RegisterClass KRCWM = _krcwm; 160 ValueType KVT = _vt; 161} 162 163def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 164def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 165def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 166def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 167def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 168def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 169def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 170 171// This multiclass generates the masking variants from the non-masking 172// variant. It only provides the assembly pieces for the masking variants. 173// It assumes custom ISel patterns for masking which can be provided as 174// template arguments. 175multiclass AVX512_maskable_custom<bits<8> O, Format F, 176 dag Outs, 177 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 178 string OpcodeStr, 179 string AttSrcAsm, string IntelSrcAsm, 180 list<dag> Pattern, 181 list<dag> MaskingPattern, 182 list<dag> ZeroMaskingPattern, 183 string MaskingConstraint = "", 184 bit IsCommutable = 0, 185 bit IsKCommutable = 0, 186 bit IsKZCommutable = IsCommutable> { 187 let isCommutable = IsCommutable in 188 def NAME: AVX512<O, F, Outs, Ins, 189 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 190 "$dst, "#IntelSrcAsm#"}", 191 Pattern>; 192 193 // Prefer over VMOV*rrk Pat<> 194 let isCommutable = IsKCommutable in 195 def NAME#k: AVX512<O, F, Outs, MaskingIns, 196 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 197 "$dst {${mask}}, "#IntelSrcAsm#"}", 198 MaskingPattern>, 199 EVEX_K { 200 // In case of the 3src subclass this is overridden with a let. 201 string Constraints = MaskingConstraint; 202 } 203 204 // Zero mask does not add any restrictions to commute operands transformation. 205 // So, it is Ok to use IsCommutable instead of IsKCommutable. 206 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<> 207 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 208 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 209 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 210 ZeroMaskingPattern>, 211 EVEX_KZ; 212} 213 214 215// Common base class of AVX512_maskable and AVX512_maskable_3src. 216multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 217 dag Outs, 218 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 219 string OpcodeStr, 220 string AttSrcAsm, string IntelSrcAsm, 221 dag RHS, dag MaskingRHS, 222 SDNode Select = vselect, 223 string MaskingConstraint = "", 224 bit IsCommutable = 0, 225 bit IsKCommutable = 0, 226 bit IsKZCommutable = IsCommutable> : 227 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 228 AttSrcAsm, IntelSrcAsm, 229 [(set _.RC:$dst, RHS)], 230 [(set _.RC:$dst, MaskingRHS)], 231 [(set _.RC:$dst, 232 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 233 MaskingConstraint, IsCommutable, 234 IsKCommutable, IsKZCommutable>; 235 236// This multiclass generates the unconditional/non-masking, the masking and 237// the zero-masking variant of the vector instruction. In the masking case, the 238// perserved vector elements come from a new dummy input operand tied to $dst. 239// This version uses a separate dag for non-masking and masking. 240multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 241 dag Outs, dag Ins, string OpcodeStr, 242 string AttSrcAsm, string IntelSrcAsm, 243 dag RHS, dag MaskRHS, 244 bit IsCommutable = 0, bit IsKCommutable = 0, 245 SDNode Select = vselect> : 246 AVX512_maskable_custom<O, F, Outs, Ins, 247 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 248 !con((ins _.KRCWM:$mask), Ins), 249 OpcodeStr, AttSrcAsm, IntelSrcAsm, 250 [(set _.RC:$dst, RHS)], 251 [(set _.RC:$dst, 252 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 253 [(set _.RC:$dst, 254 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 255 "$src0 = $dst", IsCommutable, IsKCommutable>; 256 257// This multiclass generates the unconditional/non-masking, the masking and 258// the zero-masking variant of the vector instruction. In the masking case, the 259// perserved vector elements come from a new dummy input operand tied to $dst. 260multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 261 dag Outs, dag Ins, string OpcodeStr, 262 string AttSrcAsm, string IntelSrcAsm, 263 dag RHS, 264 bit IsCommutable = 0, bit IsKCommutable = 0, 265 bit IsKZCommutable = IsCommutable, 266 SDNode Select = vselect> : 267 AVX512_maskable_common<O, F, _, Outs, Ins, 268 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 269 !con((ins _.KRCWM:$mask), Ins), 270 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 271 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 272 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 273 IsKZCommutable>; 274 275// This multiclass generates the unconditional/non-masking, the masking and 276// the zero-masking variant of the scalar instruction. 277multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 278 dag Outs, dag Ins, string OpcodeStr, 279 string AttSrcAsm, string IntelSrcAsm, 280 dag RHS> : 281 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 282 RHS, 0, 0, 0, X86selects>; 283 284// Similar to AVX512_maskable but in this case one of the source operands 285// ($src1) is already tied to $dst so we just use that for the preserved 286// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 287// $src1. 288multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 289 dag Outs, dag NonTiedIns, string OpcodeStr, 290 string AttSrcAsm, string IntelSrcAsm, 291 dag RHS, 292 bit IsCommutable = 0, 293 bit IsKCommutable = 0, 294 SDNode Select = vselect, 295 bit MaskOnly = 0> : 296 AVX512_maskable_common<O, F, _, Outs, 297 !con((ins _.RC:$src1), NonTiedIns), 298 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 300 OpcodeStr, AttSrcAsm, IntelSrcAsm, 301 !if(MaskOnly, (null_frag), RHS), 302 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 303 Select, "", IsCommutable, IsKCommutable>; 304 305// Similar to AVX512_maskable_3src but in this case the input VT for the tied 306// operand differs from the output VT. This requires a bitconvert on 307// the preserved vector going into the vselect. 308// NOTE: The unmasked pattern is disabled. 309multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 310 X86VectorVTInfo InVT, 311 dag Outs, dag NonTiedIns, string OpcodeStr, 312 string AttSrcAsm, string IntelSrcAsm, 313 dag RHS, bit IsCommutable = 0> : 314 AVX512_maskable_common<O, F, OutVT, Outs, 315 !con((ins InVT.RC:$src1), NonTiedIns), 316 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 318 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 319 (vselect InVT.KRCWM:$mask, RHS, 320 (bitconvert InVT.RC:$src1)), 321 vselect, "", IsCommutable>; 322 323multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 324 dag Outs, dag NonTiedIns, string OpcodeStr, 325 string AttSrcAsm, string IntelSrcAsm, 326 dag RHS, 327 bit IsCommutable = 0, 328 bit IsKCommutable = 0, 329 bit MaskOnly = 0> : 330 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 331 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 332 X86selects, MaskOnly>; 333 334multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 335 dag Outs, dag Ins, 336 string OpcodeStr, 337 string AttSrcAsm, string IntelSrcAsm, 338 list<dag> Pattern> : 339 AVX512_maskable_custom<O, F, Outs, Ins, 340 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 341 !con((ins _.KRCWM:$mask), Ins), 342 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 343 "$src0 = $dst">; 344 345multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 346 dag Outs, dag NonTiedIns, 347 string OpcodeStr, 348 string AttSrcAsm, string IntelSrcAsm, 349 list<dag> Pattern> : 350 AVX512_maskable_custom<O, F, Outs, 351 !con((ins _.RC:$src1), NonTiedIns), 352 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 354 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 355 "">; 356 357// Instruction with mask that puts result in mask register, 358// like "compare" and "vptest" 359multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 360 dag Outs, 361 dag Ins, dag MaskingIns, 362 string OpcodeStr, 363 string AttSrcAsm, string IntelSrcAsm, 364 list<dag> Pattern, 365 list<dag> MaskingPattern, 366 bit IsCommutable = 0> { 367 let isCommutable = IsCommutable in { 368 def NAME: AVX512<O, F, Outs, Ins, 369 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 370 "$dst, "#IntelSrcAsm#"}", 371 Pattern>; 372 373 def NAME#k: AVX512<O, F, Outs, MaskingIns, 374 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 375 "$dst {${mask}}, "#IntelSrcAsm#"}", 376 MaskingPattern>, EVEX_K; 377 } 378} 379 380multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 381 dag Outs, 382 dag Ins, dag MaskingIns, 383 string OpcodeStr, 384 string AttSrcAsm, string IntelSrcAsm, 385 dag RHS, dag MaskingRHS, 386 bit IsCommutable = 0> : 387 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 388 AttSrcAsm, IntelSrcAsm, 389 [(set _.KRC:$dst, RHS)], 390 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 391 392multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 393 dag Outs, dag Ins, string OpcodeStr, 394 string AttSrcAsm, string IntelSrcAsm, 395 dag RHS, dag RHS_su, bit IsCommutable = 0> : 396 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 397 !con((ins _.KRCWM:$mask), Ins), 398 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 399 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 400 401 402// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 403// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 404// swizzled by ExecutionDomainFix to pxor. 405// We set canFoldAsLoad because this can be converted to a constant-pool 406// load of an all-zeros value if folding it would be beneficial. 407let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 408 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 409def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 410 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 411def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 412 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 413} 414 415// Alias instructions that allow VPTERNLOG to be used with a mask to create 416// a mix of all ones and all zeros elements. This is done this way to force 417// the same register to be used as input for all three sources. 418let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 419def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 420 (ins VK16WM:$mask), "", 421 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 422 (v16i32 immAllOnesV), 423 (v16i32 immAllZerosV)))]>; 424def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 425 (ins VK8WM:$mask), "", 426 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 427 (v8i64 immAllOnesV), 428 (v8i64 immAllZerosV)))]>; 429} 430 431let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 432 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 433def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 434 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 435def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 436 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 437} 438 439// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 440// This is expanded by ExpandPostRAPseudos. 441let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 442 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 443 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 444 [(set FR32X:$dst, fp32imm0)]>; 445 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 446 [(set FR64X:$dst, fpimm0)]>; 447} 448 449//===----------------------------------------------------------------------===// 450// AVX-512 - VECTOR INSERT 451// 452 453// Supports two different pattern operators for mask and unmasked ops. Allows 454// null_frag to be passed for one. 455multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 456 X86VectorVTInfo To, 457 SDPatternOperator vinsert_insert, 458 SDPatternOperator vinsert_for_mask, 459 X86FoldableSchedWrite sched> { 460 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 461 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 462 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 463 "vinsert" # From.EltTypeName # "x" # From.NumElts, 464 "$src3, $src2, $src1", "$src1, $src2, $src3", 465 (vinsert_insert:$src3 (To.VT To.RC:$src1), 466 (From.VT From.RC:$src2), 467 (iPTR imm)), 468 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 469 (From.VT From.RC:$src2), 470 (iPTR imm))>, 471 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 472 let mayLoad = 1 in 473 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 474 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 475 "vinsert" # From.EltTypeName # "x" # From.NumElts, 476 "$src3, $src2, $src1", "$src1, $src2, $src3", 477 (vinsert_insert:$src3 (To.VT To.RC:$src1), 478 (From.VT (From.LdFrag addr:$src2)), 479 (iPTR imm)), 480 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 481 (From.VT (From.LdFrag addr:$src2)), 482 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 483 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 484 Sched<[sched.Folded, sched.ReadAfterFold]>; 485 } 486} 487 488// Passes the same pattern operator for masked and unmasked ops. 489multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 490 X86VectorVTInfo To, 491 SDPatternOperator vinsert_insert, 492 X86FoldableSchedWrite sched> : 493 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 494 495multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 496 X86VectorVTInfo To, PatFrag vinsert_insert, 497 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 498 let Predicates = p in { 499 def : Pat<(vinsert_insert:$ins 500 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 501 (To.VT (!cast<Instruction>(InstrStr#"rr") 502 To.RC:$src1, From.RC:$src2, 503 (INSERT_get_vinsert_imm To.RC:$ins)))>; 504 505 def : Pat<(vinsert_insert:$ins 506 (To.VT To.RC:$src1), 507 (From.VT (From.LdFrag addr:$src2)), 508 (iPTR imm)), 509 (To.VT (!cast<Instruction>(InstrStr#"rm") 510 To.RC:$src1, addr:$src2, 511 (INSERT_get_vinsert_imm To.RC:$ins)))>; 512 } 513} 514 515multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 516 ValueType EltVT64, int Opcode256, 517 X86FoldableSchedWrite sched> { 518 519 let Predicates = [HasVLX] in 520 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 521 X86VectorVTInfo< 4, EltVT32, VR128X>, 522 X86VectorVTInfo< 8, EltVT32, VR256X>, 523 vinsert128_insert, sched>, EVEX_V256; 524 525 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 526 X86VectorVTInfo< 4, EltVT32, VR128X>, 527 X86VectorVTInfo<16, EltVT32, VR512>, 528 vinsert128_insert, sched>, EVEX_V512; 529 530 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 531 X86VectorVTInfo< 4, EltVT64, VR256X>, 532 X86VectorVTInfo< 8, EltVT64, VR512>, 533 vinsert256_insert, sched>, VEX_W, EVEX_V512; 534 535 // Even with DQI we'd like to only use these instructions for masking. 536 let Predicates = [HasVLX, HasDQI] in 537 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 538 X86VectorVTInfo< 2, EltVT64, VR128X>, 539 X86VectorVTInfo< 4, EltVT64, VR256X>, 540 null_frag, vinsert128_insert, sched>, 541 VEX_W1X, EVEX_V256; 542 543 // Even with DQI we'd like to only use these instructions for masking. 544 let Predicates = [HasDQI] in { 545 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 546 X86VectorVTInfo< 2, EltVT64, VR128X>, 547 X86VectorVTInfo< 8, EltVT64, VR512>, 548 null_frag, vinsert128_insert, sched>, 549 VEX_W, EVEX_V512; 550 551 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 552 X86VectorVTInfo< 8, EltVT32, VR256X>, 553 X86VectorVTInfo<16, EltVT32, VR512>, 554 null_frag, vinsert256_insert, sched>, 555 EVEX_V512; 556 } 557} 558 559// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 560defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 561defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 562 563// Codegen pattern with the alternative types, 564// Even with AVX512DQ we'll still use these for unmasked operations. 565defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 566 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 567defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 568 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 569 570defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 571 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 572defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 573 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 574 575defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 576 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 577defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 578 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 579 580// Codegen pattern with the alternative types insert VEC128 into VEC256 581defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 582 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 583defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 584 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 585// Codegen pattern with the alternative types insert VEC128 into VEC512 586defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 587 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 588defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 589 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 590// Codegen pattern with the alternative types insert VEC256 into VEC512 591defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 592 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 593defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 594 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 595 596 597multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 598 X86VectorVTInfo To, X86VectorVTInfo Cast, 599 PatFrag vinsert_insert, 600 SDNodeXForm INSERT_get_vinsert_imm, 601 list<Predicate> p> { 602let Predicates = p in { 603 def : Pat<(Cast.VT 604 (vselect Cast.KRCWM:$mask, 605 (bitconvert 606 (vinsert_insert:$ins (To.VT To.RC:$src1), 607 (From.VT From.RC:$src2), 608 (iPTR imm))), 609 Cast.RC:$src0)), 610 (!cast<Instruction>(InstrStr#"rrk") 611 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 612 (INSERT_get_vinsert_imm To.RC:$ins))>; 613 def : Pat<(Cast.VT 614 (vselect Cast.KRCWM:$mask, 615 (bitconvert 616 (vinsert_insert:$ins (To.VT To.RC:$src1), 617 (From.VT 618 (bitconvert 619 (From.LdFrag addr:$src2))), 620 (iPTR imm))), 621 Cast.RC:$src0)), 622 (!cast<Instruction>(InstrStr#"rmk") 623 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 624 (INSERT_get_vinsert_imm To.RC:$ins))>; 625 626 def : Pat<(Cast.VT 627 (vselect Cast.KRCWM:$mask, 628 (bitconvert 629 (vinsert_insert:$ins (To.VT To.RC:$src1), 630 (From.VT From.RC:$src2), 631 (iPTR imm))), 632 Cast.ImmAllZerosV)), 633 (!cast<Instruction>(InstrStr#"rrkz") 634 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 635 (INSERT_get_vinsert_imm To.RC:$ins))>; 636 def : Pat<(Cast.VT 637 (vselect Cast.KRCWM:$mask, 638 (bitconvert 639 (vinsert_insert:$ins (To.VT To.RC:$src1), 640 (From.VT (From.LdFrag addr:$src2)), 641 (iPTR imm))), 642 Cast.ImmAllZerosV)), 643 (!cast<Instruction>(InstrStr#"rmkz") 644 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 645 (INSERT_get_vinsert_imm To.RC:$ins))>; 646} 647} 648 649defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 650 v8f32x_info, vinsert128_insert, 651 INSERT_get_vinsert128_imm, [HasVLX]>; 652defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 653 v4f64x_info, vinsert128_insert, 654 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 655 656defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 657 v8i32x_info, vinsert128_insert, 658 INSERT_get_vinsert128_imm, [HasVLX]>; 659defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 660 v8i32x_info, vinsert128_insert, 661 INSERT_get_vinsert128_imm, [HasVLX]>; 662defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 663 v8i32x_info, vinsert128_insert, 664 INSERT_get_vinsert128_imm, [HasVLX]>; 665defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 666 v4i64x_info, vinsert128_insert, 667 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 668defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 669 v4i64x_info, vinsert128_insert, 670 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 671defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 672 v4i64x_info, vinsert128_insert, 673 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 674 675defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 676 v16f32_info, vinsert128_insert, 677 INSERT_get_vinsert128_imm, [HasAVX512]>; 678defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 679 v8f64_info, vinsert128_insert, 680 INSERT_get_vinsert128_imm, [HasDQI]>; 681 682defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 683 v16i32_info, vinsert128_insert, 684 INSERT_get_vinsert128_imm, [HasAVX512]>; 685defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 686 v16i32_info, vinsert128_insert, 687 INSERT_get_vinsert128_imm, [HasAVX512]>; 688defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 689 v16i32_info, vinsert128_insert, 690 INSERT_get_vinsert128_imm, [HasAVX512]>; 691defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 692 v8i64_info, vinsert128_insert, 693 INSERT_get_vinsert128_imm, [HasDQI]>; 694defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 695 v8i64_info, vinsert128_insert, 696 INSERT_get_vinsert128_imm, [HasDQI]>; 697defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 698 v8i64_info, vinsert128_insert, 699 INSERT_get_vinsert128_imm, [HasDQI]>; 700 701defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 702 v16f32_info, vinsert256_insert, 703 INSERT_get_vinsert256_imm, [HasDQI]>; 704defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 705 v8f64_info, vinsert256_insert, 706 INSERT_get_vinsert256_imm, [HasAVX512]>; 707 708defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 709 v16i32_info, vinsert256_insert, 710 INSERT_get_vinsert256_imm, [HasDQI]>; 711defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 712 v16i32_info, vinsert256_insert, 713 INSERT_get_vinsert256_imm, [HasDQI]>; 714defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 715 v16i32_info, vinsert256_insert, 716 INSERT_get_vinsert256_imm, [HasDQI]>; 717defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 718 v8i64_info, vinsert256_insert, 719 INSERT_get_vinsert256_imm, [HasAVX512]>; 720defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 721 v8i64_info, vinsert256_insert, 722 INSERT_get_vinsert256_imm, [HasAVX512]>; 723defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 724 v8i64_info, vinsert256_insert, 725 INSERT_get_vinsert256_imm, [HasAVX512]>; 726 727// vinsertps - insert f32 to XMM 728let ExeDomain = SSEPackedSingle in { 729let isCommutable = 1 in 730def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 731 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 732 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 733 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, 734 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 735def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 736 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 737 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 738 [(set VR128X:$dst, (X86insertps VR128X:$src1, 739 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 740 imm:$src3))]>, 741 EVEX_4V, EVEX_CD8<32, CD8VT1>, 742 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 743} 744 745//===----------------------------------------------------------------------===// 746// AVX-512 VECTOR EXTRACT 747//--- 748 749// Supports two different pattern operators for mask and unmasked ops. Allows 750// null_frag to be passed for one. 751multiclass vextract_for_size_split<int Opcode, 752 X86VectorVTInfo From, X86VectorVTInfo To, 753 SDPatternOperator vextract_extract, 754 SDPatternOperator vextract_for_mask, 755 SchedWrite SchedRR, SchedWrite SchedMR> { 756 757 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 758 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 759 (ins From.RC:$src1, u8imm:$idx), 760 "vextract" # To.EltTypeName # "x" # To.NumElts, 761 "$idx, $src1", "$src1, $idx", 762 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 763 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 764 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 765 766 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 767 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 768 "vextract" # To.EltTypeName # "x" # To.NumElts # 769 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 770 [(store (To.VT (vextract_extract:$idx 771 (From.VT From.RC:$src1), (iPTR imm))), 772 addr:$dst)]>, EVEX, 773 Sched<[SchedMR]>; 774 775 let mayStore = 1, hasSideEffects = 0 in 776 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 777 (ins To.MemOp:$dst, To.KRCWM:$mask, 778 From.RC:$src1, u8imm:$idx), 779 "vextract" # To.EltTypeName # "x" # To.NumElts # 780 "\t{$idx, $src1, $dst {${mask}}|" 781 "$dst {${mask}}, $src1, $idx}", []>, 782 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 783 } 784} 785 786// Passes the same pattern operator for masked and unmasked ops. 787multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 788 X86VectorVTInfo To, 789 SDPatternOperator vextract_extract, 790 SchedWrite SchedRR, SchedWrite SchedMR> : 791 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 792 793// Codegen pattern for the alternative types 794multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 795 X86VectorVTInfo To, PatFrag vextract_extract, 796 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 797 let Predicates = p in { 798 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 799 (To.VT (!cast<Instruction>(InstrStr#"rr") 800 From.RC:$src1, 801 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 802 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 803 (iPTR imm))), addr:$dst), 804 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 805 (EXTRACT_get_vextract_imm To.RC:$ext))>; 806 } 807} 808 809multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 810 ValueType EltVT64, int Opcode256, 811 SchedWrite SchedRR, SchedWrite SchedMR> { 812 let Predicates = [HasAVX512] in { 813 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 814 X86VectorVTInfo<16, EltVT32, VR512>, 815 X86VectorVTInfo< 4, EltVT32, VR128X>, 816 vextract128_extract, SchedRR, SchedMR>, 817 EVEX_V512, EVEX_CD8<32, CD8VT4>; 818 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 819 X86VectorVTInfo< 8, EltVT64, VR512>, 820 X86VectorVTInfo< 4, EltVT64, VR256X>, 821 vextract256_extract, SchedRR, SchedMR>, 822 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 823 } 824 let Predicates = [HasVLX] in 825 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 826 X86VectorVTInfo< 8, EltVT32, VR256X>, 827 X86VectorVTInfo< 4, EltVT32, VR128X>, 828 vextract128_extract, SchedRR, SchedMR>, 829 EVEX_V256, EVEX_CD8<32, CD8VT4>; 830 831 // Even with DQI we'd like to only use these instructions for masking. 832 let Predicates = [HasVLX, HasDQI] in 833 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 834 X86VectorVTInfo< 4, EltVT64, VR256X>, 835 X86VectorVTInfo< 2, EltVT64, VR128X>, 836 null_frag, vextract128_extract, SchedRR, SchedMR>, 837 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 838 839 // Even with DQI we'd like to only use these instructions for masking. 840 let Predicates = [HasDQI] in { 841 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 842 X86VectorVTInfo< 8, EltVT64, VR512>, 843 X86VectorVTInfo< 2, EltVT64, VR128X>, 844 null_frag, vextract128_extract, SchedRR, SchedMR>, 845 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 846 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 847 X86VectorVTInfo<16, EltVT32, VR512>, 848 X86VectorVTInfo< 8, EltVT32, VR256X>, 849 null_frag, vextract256_extract, SchedRR, SchedMR>, 850 EVEX_V512, EVEX_CD8<32, CD8VT8>; 851 } 852} 853 854// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 855defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 856defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 857 858// extract_subvector codegen patterns with the alternative types. 859// Even with AVX512DQ we'll still use these for unmasked operations. 860defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 861 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 862defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 863 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 864 865defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 866 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 867defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 868 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 869 870defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 871 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 872defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 873 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 874 875// Codegen pattern with the alternative types extract VEC128 from VEC256 876defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 877 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 878defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 879 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 880 881// Codegen pattern with the alternative types extract VEC128 from VEC512 882defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 883 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 884defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 885 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 886// Codegen pattern with the alternative types extract VEC256 from VEC512 887defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 888 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 889defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 890 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 891 892 893// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 894// smaller extract to enable EVEX->VEX. 895let Predicates = [NoVLX] in { 896def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 897 (v2i64 (VEXTRACTI128rr 898 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 899 (iPTR 1)))>; 900def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 901 (v2f64 (VEXTRACTF128rr 902 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 903 (iPTR 1)))>; 904def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 905 (v4i32 (VEXTRACTI128rr 906 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 907 (iPTR 1)))>; 908def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 909 (v4f32 (VEXTRACTF128rr 910 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 911 (iPTR 1)))>; 912def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 913 (v8i16 (VEXTRACTI128rr 914 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 915 (iPTR 1)))>; 916def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 917 (v16i8 (VEXTRACTI128rr 918 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 919 (iPTR 1)))>; 920} 921 922// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 923// smaller extract to enable EVEX->VEX. 924let Predicates = [HasVLX] in { 925def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 926 (v2i64 (VEXTRACTI32x4Z256rr 927 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 928 (iPTR 1)))>; 929def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 930 (v2f64 (VEXTRACTF32x4Z256rr 931 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 932 (iPTR 1)))>; 933def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 934 (v4i32 (VEXTRACTI32x4Z256rr 935 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 936 (iPTR 1)))>; 937def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 938 (v4f32 (VEXTRACTF32x4Z256rr 939 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 940 (iPTR 1)))>; 941def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 942 (v8i16 (VEXTRACTI32x4Z256rr 943 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 944 (iPTR 1)))>; 945def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 946 (v16i8 (VEXTRACTI32x4Z256rr 947 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 948 (iPTR 1)))>; 949} 950 951 952// Additional patterns for handling a bitcast between the vselect and the 953// extract_subvector. 954multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 955 X86VectorVTInfo To, X86VectorVTInfo Cast, 956 PatFrag vextract_extract, 957 SDNodeXForm EXTRACT_get_vextract_imm, 958 list<Predicate> p> { 959let Predicates = p in { 960 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 961 (bitconvert 962 (To.VT (vextract_extract:$ext 963 (From.VT From.RC:$src), (iPTR imm)))), 964 To.RC:$src0)), 965 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 966 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 967 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 968 969 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 970 (bitconvert 971 (To.VT (vextract_extract:$ext 972 (From.VT From.RC:$src), (iPTR imm)))), 973 Cast.ImmAllZerosV)), 974 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 975 Cast.KRCWM:$mask, From.RC:$src, 976 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 977} 978} 979 980defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 981 v4f32x_info, vextract128_extract, 982 EXTRACT_get_vextract128_imm, [HasVLX]>; 983defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 984 v2f64x_info, vextract128_extract, 985 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 986 987defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 988 v4i32x_info, vextract128_extract, 989 EXTRACT_get_vextract128_imm, [HasVLX]>; 990defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 991 v4i32x_info, vextract128_extract, 992 EXTRACT_get_vextract128_imm, [HasVLX]>; 993defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 994 v4i32x_info, vextract128_extract, 995 EXTRACT_get_vextract128_imm, [HasVLX]>; 996defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 997 v2i64x_info, vextract128_extract, 998 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 999defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1000 v2i64x_info, vextract128_extract, 1001 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1002defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1003 v2i64x_info, vextract128_extract, 1004 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1005 1006defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1007 v4f32x_info, vextract128_extract, 1008 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1009defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1010 v2f64x_info, vextract128_extract, 1011 EXTRACT_get_vextract128_imm, [HasDQI]>; 1012 1013defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1014 v4i32x_info, vextract128_extract, 1015 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1016defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1017 v4i32x_info, vextract128_extract, 1018 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1019defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1020 v4i32x_info, vextract128_extract, 1021 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1022defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1023 v2i64x_info, vextract128_extract, 1024 EXTRACT_get_vextract128_imm, [HasDQI]>; 1025defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1026 v2i64x_info, vextract128_extract, 1027 EXTRACT_get_vextract128_imm, [HasDQI]>; 1028defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1029 v2i64x_info, vextract128_extract, 1030 EXTRACT_get_vextract128_imm, [HasDQI]>; 1031 1032defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1033 v8f32x_info, vextract256_extract, 1034 EXTRACT_get_vextract256_imm, [HasDQI]>; 1035defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1036 v4f64x_info, vextract256_extract, 1037 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1038 1039defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1040 v8i32x_info, vextract256_extract, 1041 EXTRACT_get_vextract256_imm, [HasDQI]>; 1042defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1043 v8i32x_info, vextract256_extract, 1044 EXTRACT_get_vextract256_imm, [HasDQI]>; 1045defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1046 v8i32x_info, vextract256_extract, 1047 EXTRACT_get_vextract256_imm, [HasDQI]>; 1048defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1049 v4i64x_info, vextract256_extract, 1050 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1051defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1052 v4i64x_info, vextract256_extract, 1053 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1054defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1055 v4i64x_info, vextract256_extract, 1056 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1057 1058// vextractps - extract 32 bits from XMM 1059def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), 1060 (ins VR128X:$src1, u8imm:$src2), 1061 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1062 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1063 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1064 1065def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1066 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1067 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1068 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1069 addr:$dst)]>, 1070 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1071 1072//===---------------------------------------------------------------------===// 1073// AVX-512 BROADCAST 1074//--- 1075// broadcast with a scalar argument. 1076multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, 1077 string Name, 1078 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { 1079 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1080 (!cast<Instruction>(Name#DestInfo.ZSuffix#r) 1081 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1082 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1083 (X86VBroadcast SrcInfo.FRC:$src), 1084 DestInfo.RC:$src0)), 1085 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk) 1086 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1087 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1088 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1089 (X86VBroadcast SrcInfo.FRC:$src), 1090 DestInfo.ImmAllZerosV)), 1091 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz) 1092 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1093} 1094 1095// Split version to allow mask and broadcast node to be different types. This 1096// helps support the 32x2 broadcasts. 1097multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1098 string Name, 1099 SchedWrite SchedRR, SchedWrite SchedRM, 1100 X86VectorVTInfo MaskInfo, 1101 X86VectorVTInfo DestInfo, 1102 X86VectorVTInfo SrcInfo, 1103 SDPatternOperator UnmaskedOp = X86VBroadcast> { 1104 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { 1105 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo, 1106 (outs MaskInfo.RC:$dst), 1107 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src", 1108 (MaskInfo.VT 1109 (bitconvert 1110 (DestInfo.VT 1111 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))), 1112 (MaskInfo.VT 1113 (bitconvert 1114 (DestInfo.VT 1115 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>, 1116 T8PD, EVEX, Sched<[SchedRR]>; 1117 let mayLoad = 1 in 1118 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo, 1119 (outs MaskInfo.RC:$dst), 1120 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 1121 (MaskInfo.VT 1122 (bitconvert 1123 (DestInfo.VT (UnmaskedOp 1124 (SrcInfo.ScalarLdFrag addr:$src))))), 1125 (MaskInfo.VT 1126 (bitconvert 1127 (DestInfo.VT (X86VBroadcast 1128 (SrcInfo.ScalarLdFrag addr:$src)))))>, 1129 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>, 1130 Sched<[SchedRM]>; 1131 } 1132 1133 def : Pat<(MaskInfo.VT 1134 (bitconvert 1135 (DestInfo.VT (UnmaskedOp 1136 (SrcInfo.VT (scalar_to_vector 1137 (SrcInfo.ScalarLdFrag addr:$src))))))), 1138 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>; 1139 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, 1140 (bitconvert 1141 (DestInfo.VT 1142 (X86VBroadcast 1143 (SrcInfo.VT (scalar_to_vector 1144 (SrcInfo.ScalarLdFrag addr:$src)))))), 1145 MaskInfo.RC:$src0)), 1146 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk) 1147 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>; 1148 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, 1149 (bitconvert 1150 (DestInfo.VT 1151 (X86VBroadcast 1152 (SrcInfo.VT (scalar_to_vector 1153 (SrcInfo.ScalarLdFrag addr:$src)))))), 1154 MaskInfo.ImmAllZerosV)), 1155 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz) 1156 MaskInfo.KRCWM:$mask, addr:$src)>; 1157} 1158 1159// Helper class to force mask and broadcast result to same type. 1160multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, 1161 SchedWrite SchedRR, SchedWrite SchedRM, 1162 X86VectorVTInfo DestInfo, 1163 X86VectorVTInfo SrcInfo> : 1164 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, 1165 DestInfo, DestInfo, SrcInfo>; 1166 1167multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1168 AVX512VLVectorVTInfo _> { 1169 let Predicates = [HasAVX512] in { 1170 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1171 WriteFShuffle256Ld, _.info512, _.info128>, 1172 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1173 _.info128>, 1174 EVEX_V512; 1175 } 1176 1177 let Predicates = [HasVLX] in { 1178 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1179 WriteFShuffle256Ld, _.info256, _.info128>, 1180 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1181 _.info128>, 1182 EVEX_V256; 1183 } 1184} 1185 1186multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1187 AVX512VLVectorVTInfo _> { 1188 let Predicates = [HasAVX512] in { 1189 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1190 WriteFShuffle256Ld, _.info512, _.info128>, 1191 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1192 _.info128>, 1193 EVEX_V512; 1194 } 1195 1196 let Predicates = [HasVLX] in { 1197 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1198 WriteFShuffle256Ld, _.info256, _.info128>, 1199 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1200 _.info128>, 1201 EVEX_V256; 1202 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1203 WriteFShuffle256Ld, _.info128, _.info128>, 1204 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, 1205 _.info128>, 1206 EVEX_V128; 1207 } 1208} 1209defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1210 avx512vl_f32_info>; 1211defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1212 avx512vl_f64_info>, VEX_W1X; 1213 1214multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1215 X86VectorVTInfo _, SDPatternOperator OpNode, 1216 RegisterClass SrcRC> { 1217 let ExeDomain = _.ExeDomain in 1218 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1219 (ins SrcRC:$src), 1220 "vpbroadcast"##_.Suffix, "$src", "$src", 1221 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX, 1222 Sched<[SchedRR]>; 1223} 1224 1225multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1226 X86VectorVTInfo _, SDPatternOperator OpNode, 1227 RegisterClass SrcRC, SubRegIndex Subreg> { 1228 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1229 defm r : AVX512_maskable_custom<opc, MRMSrcReg, 1230 (outs _.RC:$dst), (ins GR32:$src), 1231 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1232 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1233 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], 1234 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1235 1236 def : Pat <(_.VT (OpNode SrcRC:$src)), 1237 (!cast<Instruction>(Name#r) 1238 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1239 1240 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1241 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, 1242 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1243 1244 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1245 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, 1246 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1247} 1248 1249multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1250 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1251 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1252 let Predicates = [prd] in 1253 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1254 OpNode, SrcRC, Subreg>, EVEX_V512; 1255 let Predicates = [prd, HasVLX] in { 1256 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1257 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1258 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1259 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1260 } 1261} 1262 1263multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1264 SDPatternOperator OpNode, 1265 RegisterClass SrcRC, Predicate prd> { 1266 let Predicates = [prd] in 1267 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1268 SrcRC>, EVEX_V512; 1269 let Predicates = [prd, HasVLX] in { 1270 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1271 SrcRC>, EVEX_V256; 1272 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1273 SrcRC>, EVEX_V128; 1274 } 1275} 1276 1277defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1278 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1279defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1280 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1281 HasBWI>; 1282defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1283 X86VBroadcast, GR32, HasAVX512>; 1284defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1285 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1286 1287// Provide aliases for broadcast from the same register class that 1288// automatically does the extract. 1289multiclass avx512_int_broadcast_rm_lowering<string Name, 1290 X86VectorVTInfo DestInfo, 1291 X86VectorVTInfo SrcInfo, 1292 X86VectorVTInfo ExtInfo> { 1293 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), 1294 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r") 1295 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>; 1296} 1297 1298multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1299 AVX512VLVectorVTInfo _, Predicate prd> { 1300 let Predicates = [prd] in { 1301 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1302 WriteShuffle256Ld, _.info512, _.info128>, 1303 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>, 1304 EVEX_V512; 1305 // Defined separately to avoid redefinition. 1306 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>; 1307 } 1308 let Predicates = [prd, HasVLX] in { 1309 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1310 WriteShuffle256Ld, _.info256, _.info128>, 1311 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>, 1312 EVEX_V256; 1313 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, 1314 WriteShuffleXLd, _.info128, _.info128>, 1315 EVEX_V128; 1316 } 1317} 1318 1319defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1320 avx512vl_i8_info, HasBWI>; 1321defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1322 avx512vl_i16_info, HasBWI>; 1323defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1324 avx512vl_i32_info, HasAVX512>; 1325defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1326 avx512vl_i64_info, HasAVX512>, VEX_W1X; 1327 1328multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1329 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1330 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1331 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1332 (_Dst.VT (X86SubVBroadcast 1333 (_Src.VT (_Src.LdFrag addr:$src))))>, 1334 Sched<[SchedWriteShuffle.YMM.Folded]>, 1335 AVX5128IBase, EVEX; 1336} 1337 1338// This should be used for the AVX512DQ broadcast instructions. It disables 1339// the unmasked patterns so that we only use the DQ instructions when masking 1340// is requested. 1341multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1342 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1343 let hasSideEffects = 0, mayLoad = 1 in 1344 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1345 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1346 (null_frag), 1347 (_Dst.VT (X86SubVBroadcast 1348 (_Src.VT (_Src.LdFrag addr:$src))))>, 1349 Sched<[SchedWriteShuffle.YMM.Folded]>, 1350 AVX5128IBase, EVEX; 1351} 1352 1353let Predicates = [HasAVX512] in { 1354 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1355 def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1356 (VPBROADCASTQZm addr:$src)>; 1357} 1358 1359let Predicates = [HasVLX] in { 1360 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1361 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1362 (VPBROADCASTQZ128m addr:$src)>; 1363 def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 1364 (VPBROADCASTQZ256m addr:$src)>; 1365} 1366let Predicates = [HasVLX, HasBWI] in { 1367 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 1368 // This means we'll encounter truncated i32 loads; match that here. 1369 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1370 (VPBROADCASTWZ128m addr:$src)>; 1371 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1372 (VPBROADCASTWZ256m addr:$src)>; 1373 def : Pat<(v8i16 (X86VBroadcast 1374 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1375 (VPBROADCASTWZ128m addr:$src)>; 1376 def : Pat<(v8i16 (X86VBroadcast 1377 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1378 (VPBROADCASTWZ128m addr:$src)>; 1379 def : Pat<(v16i16 (X86VBroadcast 1380 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1381 (VPBROADCASTWZ256m addr:$src)>; 1382 def : Pat<(v16i16 (X86VBroadcast 1383 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1384 (VPBROADCASTWZ256m addr:$src)>; 1385} 1386let Predicates = [HasBWI] in { 1387 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 1388 // This means we'll encounter truncated i32 loads; match that here. 1389 def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1390 (VPBROADCASTWZm addr:$src)>; 1391 def : Pat<(v32i16 (X86VBroadcast 1392 (i16 (trunc (i32 (extloadi16 addr:$src)))))), 1393 (VPBROADCASTWZm addr:$src)>; 1394 def : Pat<(v32i16 (X86VBroadcast 1395 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1396 (VPBROADCASTWZm addr:$src)>; 1397} 1398 1399//===----------------------------------------------------------------------===// 1400// AVX-512 BROADCAST SUBVECTORS 1401// 1402 1403defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1404 v16i32_info, v4i32x_info>, 1405 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1406defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1407 v16f32_info, v4f32x_info>, 1408 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1409defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1410 v8i64_info, v4i64x_info>, VEX_W, 1411 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1412defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1413 v8f64_info, v4f64x_info>, VEX_W, 1414 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1415 1416let Predicates = [HasAVX512] in { 1417def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))), 1418 (VBROADCASTF64X4rm addr:$src)>; 1419def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))), 1420 (VBROADCASTI64X4rm addr:$src)>; 1421def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))), 1422 (VBROADCASTI64X4rm addr:$src)>; 1423def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))), 1424 (VBROADCASTI64X4rm addr:$src)>; 1425 1426// Provide fallback in case the load node that is used in the patterns above 1427// is used by additional users, which prevents the pattern selection. 1428def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), 1429 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1430 (v4f64 VR256X:$src), 1)>; 1431def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), 1432 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1433 (v8f32 VR256X:$src), 1)>; 1434def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), 1435 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1436 (v4i64 VR256X:$src), 1)>; 1437def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), 1438 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1439 (v8i32 VR256X:$src), 1)>; 1440def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))), 1441 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1442 (v16i16 VR256X:$src), 1)>; 1443def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))), 1444 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1445 (v32i8 VR256X:$src), 1)>; 1446 1447def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1448 (VBROADCASTF32X4rm addr:$src)>; 1449def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1450 (VBROADCASTI32X4rm addr:$src)>; 1451def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 1452 (VBROADCASTI32X4rm addr:$src)>; 1453def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 1454 (VBROADCASTI32X4rm addr:$src)>; 1455 1456// Patterns for selects of bitcasted operations. 1457def : Pat<(vselect VK16WM:$mask, 1458 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1459 (v16f32 immAllZerosV)), 1460 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1461def : Pat<(vselect VK16WM:$mask, 1462 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1463 VR512:$src0), 1464 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1465def : Pat<(vselect VK16WM:$mask, 1466 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1467 (v16i32 immAllZerosV)), 1468 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1469def : Pat<(vselect VK16WM:$mask, 1470 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1471 VR512:$src0), 1472 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1473 1474def : Pat<(vselect VK8WM:$mask, 1475 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1476 (v8f64 immAllZerosV)), 1477 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1478def : Pat<(vselect VK8WM:$mask, 1479 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1480 VR512:$src0), 1481 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1482def : Pat<(vselect VK8WM:$mask, 1483 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), 1484 (v8i64 immAllZerosV)), 1485 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1486def : Pat<(vselect VK8WM:$mask, 1487 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), 1488 VR512:$src0), 1489 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1490} 1491 1492let Predicates = [HasVLX] in { 1493defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1494 v8i32x_info, v4i32x_info>, 1495 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1496defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1497 v8f32x_info, v4f32x_info>, 1498 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1499 1500def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1501 (VBROADCASTF32X4Z256rm addr:$src)>; 1502def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1503 (VBROADCASTI32X4Z256rm addr:$src)>; 1504def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 1505 (VBROADCASTI32X4Z256rm addr:$src)>; 1506def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 1507 (VBROADCASTI32X4Z256rm addr:$src)>; 1508 1509// Patterns for selects of bitcasted operations. 1510def : Pat<(vselect VK8WM:$mask, 1511 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1512 (v8f32 immAllZerosV)), 1513 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1514def : Pat<(vselect VK8WM:$mask, 1515 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1516 VR256X:$src0), 1517 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1518def : Pat<(vselect VK8WM:$mask, 1519 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1520 (v8i32 immAllZerosV)), 1521 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1522def : Pat<(vselect VK8WM:$mask, 1523 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1524 VR256X:$src0), 1525 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1526 1527 1528// Provide fallback in case the load node that is used in the patterns above 1529// is used by additional users, which prevents the pattern selection. 1530def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 1531 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1532 (v2f64 VR128X:$src), 1)>; 1533def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 1534 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1535 (v4f32 VR128X:$src), 1)>; 1536def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 1537 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1538 (v2i64 VR128X:$src), 1)>; 1539def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 1540 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1541 (v4i32 VR128X:$src), 1)>; 1542def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 1543 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1544 (v8i16 VR128X:$src), 1)>; 1545def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 1546 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1547 (v16i8 VR128X:$src), 1)>; 1548} 1549 1550let Predicates = [HasVLX, HasDQI] in { 1551defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1552 v4i64x_info, v2i64x_info>, VEX_W1X, 1553 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1554defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1555 v4f64x_info, v2f64x_info>, VEX_W1X, 1556 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1557 1558// Patterns for selects of bitcasted operations. 1559def : Pat<(vselect VK4WM:$mask, 1560 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1561 (v4f64 immAllZerosV)), 1562 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1563def : Pat<(vselect VK4WM:$mask, 1564 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1565 VR256X:$src0), 1566 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1567def : Pat<(vselect VK4WM:$mask, 1568 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1569 (v4i64 immAllZerosV)), 1570 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1571def : Pat<(vselect VK4WM:$mask, 1572 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1573 VR256X:$src0), 1574 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1575} 1576 1577let Predicates = [HasDQI] in { 1578defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1579 v8i64_info, v2i64x_info>, VEX_W, 1580 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1581defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1582 v16i32_info, v8i32x_info>, 1583 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1584defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1585 v8f64_info, v2f64x_info>, VEX_W, 1586 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1587defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1588 v16f32_info, v8f32x_info>, 1589 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1590 1591// Patterns for selects of bitcasted operations. 1592def : Pat<(vselect VK16WM:$mask, 1593 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1594 (v16f32 immAllZerosV)), 1595 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1596def : Pat<(vselect VK16WM:$mask, 1597 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1598 VR512:$src0), 1599 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1600def : Pat<(vselect VK16WM:$mask, 1601 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1602 (v16i32 immAllZerosV)), 1603 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1604def : Pat<(vselect VK16WM:$mask, 1605 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1606 VR512:$src0), 1607 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1608 1609def : Pat<(vselect VK8WM:$mask, 1610 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1611 (v8f64 immAllZerosV)), 1612 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1613def : Pat<(vselect VK8WM:$mask, 1614 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1615 VR512:$src0), 1616 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1617def : Pat<(vselect VK8WM:$mask, 1618 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1619 (v8i64 immAllZerosV)), 1620 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1621def : Pat<(vselect VK8WM:$mask, 1622 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1623 VR512:$src0), 1624 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1625} 1626 1627multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1628 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { 1629 let Predicates = [HasDQI] in 1630 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1631 WriteShuffle256Ld, _Dst.info512, 1632 _Src.info512, _Src.info128, null_frag>, 1633 EVEX_V512; 1634 let Predicates = [HasDQI, HasVLX] in 1635 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1636 WriteShuffle256Ld, _Dst.info256, 1637 _Src.info256, _Src.info128, null_frag>, 1638 EVEX_V256; 1639} 1640 1641multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1642 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : 1643 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1644 1645 let Predicates = [HasDQI, HasVLX] in 1646 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, 1647 WriteShuffleXLd, _Dst.info128, 1648 _Src.info128, _Src.info128, null_frag>, 1649 EVEX_V128; 1650} 1651 1652defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1653 avx512vl_i32_info, avx512vl_i64_info>; 1654defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1655 avx512vl_f32_info, avx512vl_f64_info>; 1656 1657let Predicates = [HasVLX] in { 1658def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))), 1659 (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; 1660def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))), 1661 (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; 1662} 1663 1664def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), 1665 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>; 1666def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), 1667 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; 1668 1669def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), 1670 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>; 1671def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), 1672 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; 1673 1674//===----------------------------------------------------------------------===// 1675// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1676//--- 1677multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1678 X86VectorVTInfo _, RegisterClass KRC> { 1679 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1680 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1681 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1682 EVEX, Sched<[WriteShuffle]>; 1683} 1684 1685multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1686 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1687 let Predicates = [HasCDI] in 1688 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1689 let Predicates = [HasCDI, HasVLX] in { 1690 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1691 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1692 } 1693} 1694 1695defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1696 avx512vl_i32_info, VK16>; 1697defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1698 avx512vl_i64_info, VK8>, VEX_W; 1699 1700//===----------------------------------------------------------------------===// 1701// -- VPERMI2 - 3 source operands form -- 1702multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1703 X86FoldableSchedWrite sched, 1704 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1705let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1706 hasSideEffects = 0 in { 1707 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1708 (ins _.RC:$src2, _.RC:$src3), 1709 OpcodeStr, "$src3, $src2", "$src2, $src3", 1710 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1711 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1712 1713 let mayLoad = 1 in 1714 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1715 (ins _.RC:$src2, _.MemOp:$src3), 1716 OpcodeStr, "$src3, $src2", "$src2, $src3", 1717 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1718 (_.VT (_.LdFrag addr:$src3)))), 1>, 1719 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1720 } 1721} 1722 1723multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1724 X86FoldableSchedWrite sched, 1725 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1726 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1727 hasSideEffects = 0, mayLoad = 1 in 1728 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1729 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1730 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1731 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1732 (_.VT (X86VPermt2 _.RC:$src2, 1733 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, 1734 AVX5128IBase, EVEX_4V, EVEX_B, 1735 Sched<[sched.Folded, sched.ReadAfterFold]>; 1736} 1737 1738multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1739 X86FoldableSchedWrite sched, 1740 AVX512VLVectorVTInfo VTInfo, 1741 AVX512VLVectorVTInfo ShuffleMask> { 1742 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1743 ShuffleMask.info512>, 1744 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1745 ShuffleMask.info512>, EVEX_V512; 1746 let Predicates = [HasVLX] in { 1747 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1748 ShuffleMask.info128>, 1749 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1750 ShuffleMask.info128>, EVEX_V128; 1751 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1752 ShuffleMask.info256>, 1753 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1754 ShuffleMask.info256>, EVEX_V256; 1755 } 1756} 1757 1758multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1759 X86FoldableSchedWrite sched, 1760 AVX512VLVectorVTInfo VTInfo, 1761 AVX512VLVectorVTInfo Idx, 1762 Predicate Prd> { 1763 let Predicates = [Prd] in 1764 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1765 Idx.info512>, EVEX_V512; 1766 let Predicates = [Prd, HasVLX] in { 1767 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1768 Idx.info128>, EVEX_V128; 1769 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1770 Idx.info256>, EVEX_V256; 1771 } 1772} 1773 1774defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1775 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1776defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1777 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1778defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1779 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1780 VEX_W, EVEX_CD8<16, CD8VF>; 1781defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1782 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1783 EVEX_CD8<8, CD8VF>; 1784defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1785 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1786defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1787 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1788 1789// Extra patterns to deal with extra bitcasts due to passthru and index being 1790// different types on the fp versions. 1791multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1792 X86VectorVTInfo IdxVT, 1793 X86VectorVTInfo CastVT> { 1794 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1795 (X86VPermt2 (_.VT _.RC:$src2), 1796 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3), 1797 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1798 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1799 _.RC:$src2, _.RC:$src3)>; 1800 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1801 (X86VPermt2 _.RC:$src2, 1802 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1803 (_.LdFrag addr:$src3)), 1804 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1805 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1806 _.RC:$src2, addr:$src3)>; 1807 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1808 (X86VPermt2 _.RC:$src2, 1809 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1810 (X86VBroadcast (_.ScalarLdFrag addr:$src3))), 1811 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1812 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1813 _.RC:$src2, addr:$src3)>; 1814} 1815 1816// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1817defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1818defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1819defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1820 1821// VPERMT2 1822multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1823 X86FoldableSchedWrite sched, 1824 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1825let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1826 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1827 (ins IdxVT.RC:$src2, _.RC:$src3), 1828 OpcodeStr, "$src3, $src2", "$src2, $src3", 1829 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1830 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1831 1832 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1833 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1834 OpcodeStr, "$src3, $src2", "$src2, $src3", 1835 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1836 (_.LdFrag addr:$src3))), 1>, 1837 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1838 } 1839} 1840multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1841 X86FoldableSchedWrite sched, 1842 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1843 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1844 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1845 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1846 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1847 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1848 (_.VT (X86VPermt2 _.RC:$src1, 1849 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, 1850 AVX5128IBase, EVEX_4V, EVEX_B, 1851 Sched<[sched.Folded, sched.ReadAfterFold]>; 1852} 1853 1854multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1855 X86FoldableSchedWrite sched, 1856 AVX512VLVectorVTInfo VTInfo, 1857 AVX512VLVectorVTInfo ShuffleMask> { 1858 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1859 ShuffleMask.info512>, 1860 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1861 ShuffleMask.info512>, EVEX_V512; 1862 let Predicates = [HasVLX] in { 1863 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1864 ShuffleMask.info128>, 1865 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1866 ShuffleMask.info128>, EVEX_V128; 1867 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1868 ShuffleMask.info256>, 1869 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1870 ShuffleMask.info256>, EVEX_V256; 1871 } 1872} 1873 1874multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1875 X86FoldableSchedWrite sched, 1876 AVX512VLVectorVTInfo VTInfo, 1877 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1878 let Predicates = [Prd] in 1879 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1880 Idx.info512>, EVEX_V512; 1881 let Predicates = [Prd, HasVLX] in { 1882 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1883 Idx.info128>, EVEX_V128; 1884 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1885 Idx.info256>, EVEX_V256; 1886 } 1887} 1888 1889defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1890 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1891defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1892 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1893defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1894 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1895 VEX_W, EVEX_CD8<16, CD8VF>; 1896defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1897 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1898 EVEX_CD8<8, CD8VF>; 1899defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1900 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1901defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1902 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1903 1904//===----------------------------------------------------------------------===// 1905// AVX-512 - BLEND using mask 1906// 1907 1908multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1909 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1910 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1911 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1912 (ins _.RC:$src1, _.RC:$src2), 1913 !strconcat(OpcodeStr, 1914 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1915 EVEX_4V, Sched<[sched]>; 1916 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1917 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1918 !strconcat(OpcodeStr, 1919 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1920 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1921 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1922 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1923 !strconcat(OpcodeStr, 1924 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1925 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1926 let mayLoad = 1 in { 1927 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1928 (ins _.RC:$src1, _.MemOp:$src2), 1929 !strconcat(OpcodeStr, 1930 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1931 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1932 Sched<[sched.Folded, sched.ReadAfterFold]>; 1933 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1934 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1935 !strconcat(OpcodeStr, 1936 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1937 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1938 Sched<[sched.Folded, sched.ReadAfterFold]>; 1939 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1940 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1941 !strconcat(OpcodeStr, 1942 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1943 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1944 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1945 } 1946 } 1947} 1948multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1949 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1950 let mayLoad = 1, hasSideEffects = 0 in { 1951 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1952 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1953 !strconcat(OpcodeStr, 1954 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1955 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1956 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1957 Sched<[sched.Folded, sched.ReadAfterFold]>; 1958 1959 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1960 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1961 !strconcat(OpcodeStr, 1962 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1963 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1964 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1965 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1966 1967 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1968 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1969 !strconcat(OpcodeStr, 1970 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1971 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1972 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1973 Sched<[sched.Folded, sched.ReadAfterFold]>; 1974 } 1975} 1976 1977multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1978 AVX512VLVectorVTInfo VTInfo> { 1979 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1980 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1981 EVEX_V512; 1982 1983 let Predicates = [HasVLX] in { 1984 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1985 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1986 EVEX_V256; 1987 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1988 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1989 EVEX_V128; 1990 } 1991} 1992 1993multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1994 AVX512VLVectorVTInfo VTInfo> { 1995 let Predicates = [HasBWI] in 1996 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1997 EVEX_V512; 1998 1999 let Predicates = [HasBWI, HasVLX] in { 2000 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2001 EVEX_V256; 2002 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2003 EVEX_V128; 2004 } 2005} 2006 2007defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2008 avx512vl_f32_info>; 2009defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2010 avx512vl_f64_info>, VEX_W; 2011defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2012 avx512vl_i32_info>; 2013defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2014 avx512vl_i64_info>, VEX_W; 2015defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2016 avx512vl_i8_info>; 2017defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2018 avx512vl_i16_info>, VEX_W; 2019 2020//===----------------------------------------------------------------------===// 2021// Compare Instructions 2022//===----------------------------------------------------------------------===// 2023 2024// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2025 2026multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2027 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2028 X86FoldableSchedWrite sched> { 2029 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2030 (outs _.KRC:$dst), 2031 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2032 "vcmp"#_.Suffix, 2033 "$cc, $src2, $src1", "$src1, $src2, $cc", 2034 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), 2035 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2036 imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>; 2037 let mayLoad = 1 in 2038 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2039 (outs _.KRC:$dst), 2040 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2041 "vcmp"#_.Suffix, 2042 "$cc, $src2, $src1", "$src1, $src2, $cc", 2043 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 2044 imm:$cc), 2045 (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 2046 imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2047 Sched<[sched.Folded, sched.ReadAfterFold]>; 2048 2049 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2050 (outs _.KRC:$dst), 2051 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2052 "vcmp"#_.Suffix, 2053 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2054 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2055 imm:$cc), 2056 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2057 imm:$cc)>, 2058 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2059 2060 let isCodeGenOnly = 1 in { 2061 let isCommutable = 1 in 2062 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2063 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2064 !strconcat("vcmp", _.Suffix, 2065 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2066 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2067 _.FRC:$src2, 2068 imm:$cc))]>, 2069 EVEX_4V, VEX_LIG, Sched<[sched]>; 2070 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2071 (outs _.KRC:$dst), 2072 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2073 !strconcat("vcmp", _.Suffix, 2074 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2075 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2076 (_.ScalarLdFrag addr:$src2), 2077 imm:$cc))]>, 2078 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2079 Sched<[sched.Folded, sched.ReadAfterFold]>; 2080 } 2081} 2082 2083def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2084 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2085 return N->hasOneUse(); 2086}]>; 2087def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2088 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2089 return N->hasOneUse(); 2090}]>; 2091 2092let Predicates = [HasAVX512] in { 2093 let ExeDomain = SSEPackedSingle in 2094 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2095 X86cmpms_su, X86cmpmsSAE_su, 2096 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2097 let ExeDomain = SSEPackedDouble in 2098 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2099 X86cmpms_su, X86cmpmsSAE_su, 2100 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2101} 2102 2103multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2104 PatFrag OpNode_su, X86FoldableSchedWrite sched, 2105 X86VectorVTInfo _, bit IsCommutable> { 2106 let isCommutable = IsCommutable in 2107 def rr : AVX512BI<opc, MRMSrcReg, 2108 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2109 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2110 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>, 2111 EVEX_4V, Sched<[sched]>; 2112 def rm : AVX512BI<opc, MRMSrcMem, 2113 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2114 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2115 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), 2116 (_.VT (_.LdFrag addr:$src2))))]>, 2117 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2118 let isCommutable = IsCommutable in 2119 def rrk : AVX512BI<opc, MRMSrcReg, 2120 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2121 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2122 "$dst {${mask}}, $src1, $src2}"), 2123 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2124 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>, 2125 EVEX_4V, EVEX_K, Sched<[sched]>; 2126 def rmk : AVX512BI<opc, MRMSrcMem, 2127 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2128 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2129 "$dst {${mask}}, $src1, $src2}"), 2130 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2131 (OpNode_su (_.VT _.RC:$src1), 2132 (_.VT (_.LdFrag addr:$src2)))))]>, 2133 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2134} 2135 2136multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2137 PatFrag OpNode_su, 2138 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2139 bit IsCommutable> : 2140 avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> { 2141 def rmb : AVX512BI<opc, MRMSrcMem, 2142 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2143 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2144 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2145 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), 2146 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>, 2147 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2148 def rmbk : AVX512BI<opc, MRMSrcMem, 2149 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2150 _.ScalarMemOp:$src2), 2151 !strconcat(OpcodeStr, 2152 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2153 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2154 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2155 (OpNode_su (_.VT _.RC:$src1), 2156 (X86VBroadcast 2157 (_.ScalarLdFrag addr:$src2)))))]>, 2158 EVEX_4V, EVEX_K, EVEX_B, 2159 Sched<[sched.Folded, sched.ReadAfterFold]>; 2160} 2161 2162multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2163 PatFrag OpNode_su, X86SchedWriteWidths sched, 2164 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2165 bit IsCommutable = 0> { 2166 let Predicates = [prd] in 2167 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM, 2168 VTInfo.info512, IsCommutable>, EVEX_V512; 2169 2170 let Predicates = [prd, HasVLX] in { 2171 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM, 2172 VTInfo.info256, IsCommutable>, EVEX_V256; 2173 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM, 2174 VTInfo.info128, IsCommutable>, EVEX_V128; 2175 } 2176} 2177 2178multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2179 PatFrag OpNode, PatFrag OpNode_su, 2180 X86SchedWriteWidths sched, 2181 AVX512VLVectorVTInfo VTInfo, 2182 Predicate prd, bit IsCommutable = 0> { 2183 let Predicates = [prd] in 2184 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM, 2185 VTInfo.info512, IsCommutable>, EVEX_V512; 2186 2187 let Predicates = [prd, HasVLX] in { 2188 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM, 2189 VTInfo.info256, IsCommutable>, EVEX_V256; 2190 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM, 2191 VTInfo.info128, IsCommutable>, EVEX_V128; 2192 } 2193} 2194 2195// This fragment treats X86cmpm as commutable to help match loads in both 2196// operands for PCMPEQ. 2197def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2198def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), 2199 (X86setcc_commute node:$src1, node:$src2, SETEQ)>; 2200def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2201 (setcc node:$src1, node:$src2, SETGT)>; 2202 2203def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2), 2204 (X86pcmpeqm_c node:$src1, node:$src2), [{ 2205 return N->hasOneUse(); 2206}]>; 2207def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2), 2208 (X86pcmpgtm node:$src1, node:$src2), [{ 2209 return N->hasOneUse(); 2210}]>; 2211 2212// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2213// increase the pattern complexity the way an immediate would. 2214let AddedComplexity = 2 in { 2215// FIXME: Is there a better scheduler class for VPCMP? 2216defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su, 2217 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2218 EVEX_CD8<8, CD8VF>, VEX_WIG; 2219 2220defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su, 2221 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2222 EVEX_CD8<16, CD8VF>, VEX_WIG; 2223 2224defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su, 2225 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2226 EVEX_CD8<32, CD8VF>; 2227 2228defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su, 2229 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2230 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2231 2232defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su, 2233 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2234 EVEX_CD8<8, CD8VF>, VEX_WIG; 2235 2236defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su, 2237 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2238 EVEX_CD8<16, CD8VF>, VEX_WIG; 2239 2240defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su, 2241 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2242 EVEX_CD8<32, CD8VF>; 2243 2244defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su, 2245 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2246 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2247} 2248 2249multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2250 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su, 2251 X86FoldableSchedWrite sched, 2252 X86VectorVTInfo _, string Name> { 2253 let isCommutable = 1 in 2254 def rri : AVX512AIi8<opc, MRMSrcReg, 2255 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2256 !strconcat("vpcmp", Suffix, 2257 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2258 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2259 (_.VT _.RC:$src2), 2260 cond)))]>, 2261 EVEX_4V, Sched<[sched]>; 2262 def rmi : AVX512AIi8<opc, MRMSrcMem, 2263 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2264 !strconcat("vpcmp", Suffix, 2265 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2266 [(set _.KRC:$dst, (_.KVT 2267 (Frag:$cc 2268 (_.VT _.RC:$src1), 2269 (_.VT (_.LdFrag addr:$src2)), 2270 cond)))]>, 2271 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2272 let isCommutable = 1 in 2273 def rrik : AVX512AIi8<opc, MRMSrcReg, 2274 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2275 u8imm:$cc), 2276 !strconcat("vpcmp", Suffix, 2277 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2278 "$dst {${mask}}, $src1, $src2, $cc}"), 2279 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2280 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2281 (_.VT _.RC:$src2), 2282 cond))))]>, 2283 EVEX_4V, EVEX_K, Sched<[sched]>; 2284 def rmik : AVX512AIi8<opc, MRMSrcMem, 2285 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2286 u8imm:$cc), 2287 !strconcat("vpcmp", Suffix, 2288 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2289 "$dst {${mask}}, $src1, $src2, $cc}"), 2290 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2291 (_.KVT 2292 (Frag_su:$cc 2293 (_.VT _.RC:$src1), 2294 (_.VT (_.LdFrag addr:$src2)), 2295 cond))))]>, 2296 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2297 2298 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2), 2299 (_.VT _.RC:$src1), cond)), 2300 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2301 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2302 2303 def : Pat<(and _.KRCWM:$mask, 2304 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2), 2305 (_.VT _.RC:$src1), cond))), 2306 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2307 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2308 (CommFrag.OperandTransform $cc))>; 2309} 2310 2311multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2312 PatFrag Frag_su, PatFrag CommFrag, 2313 PatFrag CommFrag_su, X86FoldableSchedWrite sched, 2314 X86VectorVTInfo _, string Name> : 2315 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2316 sched, _, Name> { 2317 def rmib : AVX512AIi8<opc, MRMSrcMem, 2318 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2319 u8imm:$cc), 2320 !strconcat("vpcmp", Suffix, 2321 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2322 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2323 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2324 (_.VT _.RC:$src1), 2325 (X86VBroadcast 2326 (_.ScalarLdFrag addr:$src2)), 2327 cond)))]>, 2328 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2329 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2330 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2331 _.ScalarMemOp:$src2, u8imm:$cc), 2332 !strconcat("vpcmp", Suffix, 2333 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2334 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2335 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2336 (_.KVT (Frag_su:$cc 2337 (_.VT _.RC:$src1), 2338 (X86VBroadcast 2339 (_.ScalarLdFrag addr:$src2)), 2340 cond))))]>, 2341 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2342 2343 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 2344 (_.VT _.RC:$src1), cond)), 2345 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2346 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2347 2348 def : Pat<(and _.KRCWM:$mask, 2349 (_.KVT (CommFrag_su:$cc (X86VBroadcast 2350 (_.ScalarLdFrag addr:$src2)), 2351 (_.VT _.RC:$src1), cond))), 2352 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2353 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2354 (CommFrag.OperandTransform $cc))>; 2355} 2356 2357multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2358 PatFrag Frag_su, PatFrag CommFrag, 2359 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2360 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2361 let Predicates = [prd] in 2362 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2363 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2364 2365 let Predicates = [prd, HasVLX] in { 2366 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2367 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2368 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2369 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2370 } 2371} 2372 2373multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2374 PatFrag Frag_su, PatFrag CommFrag, 2375 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2376 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2377 let Predicates = [prd] in 2378 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2379 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2380 2381 let Predicates = [prd, HasVLX] in { 2382 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2383 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2384 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2385 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2386 } 2387} 2388 2389def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2390 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2391 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2392 return getI8Imm(SSECC, SDLoc(N)); 2393}]>; 2394 2395// Swapped operand version of the above. 2396def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2397 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2398 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2399 SSECC = X86::getSwappedVPCMPImm(SSECC); 2400 return getI8Imm(SSECC, SDLoc(N)); 2401}]>; 2402 2403def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2404 (setcc node:$src1, node:$src2, node:$cc), [{ 2405 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2406 return !ISD::isUnsignedIntSetCC(CC); 2407}], X86pcmpm_imm>; 2408 2409def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2410 (setcc node:$src1, node:$src2, node:$cc), [{ 2411 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2412 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2413}], X86pcmpm_imm>; 2414 2415// Same as above, but commutes immediate. Use for load folding. 2416def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2417 (setcc node:$src1, node:$src2, node:$cc), [{ 2418 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2419 return !ISD::isUnsignedIntSetCC(CC); 2420}], X86pcmpm_imm_commute>; 2421 2422def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2423 (setcc node:$src1, node:$src2, node:$cc), [{ 2424 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2425 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2426}], X86pcmpm_imm_commute>; 2427 2428def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2429 (setcc node:$src1, node:$src2, node:$cc), [{ 2430 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2431 return ISD::isUnsignedIntSetCC(CC); 2432}], X86pcmpm_imm>; 2433 2434def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2435 (setcc node:$src1, node:$src2, node:$cc), [{ 2436 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2437 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2438}], X86pcmpm_imm>; 2439 2440// Same as above, but commutes immediate. Use for load folding. 2441def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2442 (setcc node:$src1, node:$src2, node:$cc), [{ 2443 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2444 return ISD::isUnsignedIntSetCC(CC); 2445}], X86pcmpm_imm_commute>; 2446 2447def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2448 (setcc node:$src1, node:$src2, node:$cc), [{ 2449 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2450 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2451}], X86pcmpm_imm_commute>; 2452 2453// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2454defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2455 X86pcmpm_commute, X86pcmpm_commute_su, 2456 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2457 EVEX_CD8<8, CD8VF>; 2458defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2459 X86pcmpum_commute, X86pcmpum_commute_su, 2460 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2461 EVEX_CD8<8, CD8VF>; 2462 2463defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2464 X86pcmpm_commute, X86pcmpm_commute_su, 2465 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2466 VEX_W, EVEX_CD8<16, CD8VF>; 2467defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2468 X86pcmpum_commute, X86pcmpum_commute_su, 2469 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2470 VEX_W, EVEX_CD8<16, CD8VF>; 2471 2472defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2473 X86pcmpm_commute, X86pcmpm_commute_su, 2474 SchedWriteVecALU, avx512vl_i32_info, 2475 HasAVX512>, EVEX_CD8<32, CD8VF>; 2476defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2477 X86pcmpum_commute, X86pcmpum_commute_su, 2478 SchedWriteVecALU, avx512vl_i32_info, 2479 HasAVX512>, EVEX_CD8<32, CD8VF>; 2480 2481defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2482 X86pcmpm_commute, X86pcmpm_commute_su, 2483 SchedWriteVecALU, avx512vl_i64_info, 2484 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2485defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2486 X86pcmpum_commute, X86pcmpum_commute_su, 2487 SchedWriteVecALU, avx512vl_i64_info, 2488 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2489 2490def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2491 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2492 return N->hasOneUse(); 2493}]>; 2494def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2495 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{ 2496 return N->hasOneUse(); 2497}]>; 2498 2499multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2500 string Name> { 2501 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2502 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2503 "vcmp"#_.Suffix, 2504 "$cc, $src2, $src1", "$src1, $src2, $cc", 2505 (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), 2506 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), 2507 1>, Sched<[sched]>; 2508 2509 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2510 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2511 "vcmp"#_.Suffix, 2512 "$cc, $src2, $src1", "$src1, $src2, $cc", 2513 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2514 imm:$cc), 2515 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2516 imm:$cc)>, 2517 Sched<[sched.Folded, sched.ReadAfterFold]>; 2518 2519 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2520 (outs _.KRC:$dst), 2521 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2522 "vcmp"#_.Suffix, 2523 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2524 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2525 (X86cmpm (_.VT _.RC:$src1), 2526 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 2527 imm:$cc), 2528 (X86cmpm_su (_.VT _.RC:$src1), 2529 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 2530 imm:$cc)>, 2531 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2532 2533 // Patterns for selecting with loads in other operand. 2534 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2535 CommutableCMPCC:$cc), 2536 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2537 imm:$cc)>; 2538 2539 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2540 (_.VT _.RC:$src1), 2541 CommutableCMPCC:$cc)), 2542 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2543 _.RC:$src1, addr:$src2, 2544 imm:$cc)>; 2545 2546 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 2547 (_.VT _.RC:$src1), CommutableCMPCC:$cc), 2548 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2549 imm:$cc)>; 2550 2551 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast 2552 (_.ScalarLdFrag addr:$src2)), 2553 (_.VT _.RC:$src1), 2554 CommutableCMPCC:$cc)), 2555 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2556 _.RC:$src1, addr:$src2, 2557 imm:$cc)>; 2558} 2559 2560multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2561 // comparison code form (VCMP[EQ/LT/LE/...] 2562 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2563 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2564 "vcmp"#_.Suffix, 2565 "$cc, {sae}, $src2, $src1", 2566 "$src1, $src2, {sae}, $cc", 2567 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), 2568 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2569 imm:$cc)>, 2570 EVEX_B, Sched<[sched]>; 2571} 2572 2573multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 2574 let Predicates = [HasAVX512] in { 2575 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2576 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2577 2578 } 2579 let Predicates = [HasAVX512,HasVLX] in { 2580 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2581 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2582 } 2583} 2584 2585defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2586 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2587defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2588 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2589 2590// Patterns to select fp compares with load as first operand. 2591let Predicates = [HasAVX512] in { 2592 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, 2593 CommutableCMPCC:$cc)), 2594 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; 2595 2596 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, 2597 CommutableCMPCC:$cc)), 2598 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; 2599} 2600 2601// ---------------------------------------------------------------- 2602// FPClass 2603 2604def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2605 (X86Vfpclasss node:$src1, node:$src2), [{ 2606 return N->hasOneUse(); 2607}]>; 2608 2609def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2610 (X86Vfpclass node:$src1, node:$src2), [{ 2611 return N->hasOneUse(); 2612}]>; 2613 2614//handle fpclass instruction mask = op(reg_scalar,imm) 2615// op(mem_scalar,imm) 2616multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2617 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2618 Predicate prd> { 2619 let Predicates = [prd], ExeDomain = _.ExeDomain in { 2620 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2621 (ins _.RC:$src1, i32u8imm:$src2), 2622 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2623 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2624 (i32 imm:$src2)))]>, 2625 Sched<[sched]>; 2626 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2627 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2628 OpcodeStr##_.Suffix# 2629 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2630 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2631 (X86Vfpclasss_su (_.VT _.RC:$src1), 2632 (i32 imm:$src2))))]>, 2633 EVEX_K, Sched<[sched]>; 2634 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2635 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2636 OpcodeStr##_.Suffix## 2637 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2638 [(set _.KRC:$dst, 2639 (X86Vfpclasss _.ScalarIntMemCPat:$src1, 2640 (i32 imm:$src2)))]>, 2641 Sched<[sched.Folded, sched.ReadAfterFold]>; 2642 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2643 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2644 OpcodeStr##_.Suffix## 2645 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2646 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2647 (X86Vfpclasss_su _.ScalarIntMemCPat:$src1, 2648 (i32 imm:$src2))))]>, 2649 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2650 } 2651} 2652 2653//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2654// fpclass(reg_vec, mem_vec, imm) 2655// fpclass(reg_vec, broadcast(eltVt), imm) 2656multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2657 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2658 string mem>{ 2659 let ExeDomain = _.ExeDomain in { 2660 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2661 (ins _.RC:$src1, i32u8imm:$src2), 2662 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2663 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2664 (i32 imm:$src2)))]>, 2665 Sched<[sched]>; 2666 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2667 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2668 OpcodeStr##_.Suffix# 2669 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2670 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2671 (X86Vfpclass_su (_.VT _.RC:$src1), 2672 (i32 imm:$src2))))]>, 2673 EVEX_K, Sched<[sched]>; 2674 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2675 (ins _.MemOp:$src1, i32u8imm:$src2), 2676 OpcodeStr##_.Suffix#"{"#mem#"}"# 2677 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2678 [(set _.KRC:$dst,(X86Vfpclass 2679 (_.VT (_.LdFrag addr:$src1)), 2680 (i32 imm:$src2)))]>, 2681 Sched<[sched.Folded, sched.ReadAfterFold]>; 2682 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2683 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2684 OpcodeStr##_.Suffix#"{"#mem#"}"# 2685 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2686 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2687 (_.VT (_.LdFrag addr:$src1)), 2688 (i32 imm:$src2))))]>, 2689 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2690 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2691 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2692 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"## 2693 _.BroadcastStr##", $dst|$dst, ${src1}" 2694 ##_.BroadcastStr##", $src2}", 2695 [(set _.KRC:$dst,(X86Vfpclass 2696 (_.VT (X86VBroadcast 2697 (_.ScalarLdFrag addr:$src1))), 2698 (i32 imm:$src2)))]>, 2699 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2700 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2701 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2702 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"## 2703 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## 2704 _.BroadcastStr##", $src2}", 2705 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2706 (_.VT (X86VBroadcast 2707 (_.ScalarLdFrag addr:$src1))), 2708 (i32 imm:$src2))))]>, 2709 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2710 } 2711 2712 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2713 // the memory form. 2714 def : InstAlias<OpcodeStr#_.Suffix#mem# 2715 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2716 (!cast<Instruction>(NAME#"rr") 2717 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2718 def : InstAlias<OpcodeStr#_.Suffix#mem# 2719 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2720 (!cast<Instruction>(NAME#"rrk") 2721 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2722 def : InstAlias<OpcodeStr#_.Suffix#mem# 2723 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2724 _.BroadcastStr#", $src2}", 2725 (!cast<Instruction>(NAME#"rmb") 2726 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2727 def : InstAlias<OpcodeStr#_.Suffix#mem# 2728 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2729 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2730 (!cast<Instruction>(NAME#"rmbk") 2731 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2732} 2733 2734multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2735 bits<8> opc, X86SchedWriteWidths sched, 2736 Predicate prd>{ 2737 let Predicates = [prd] in { 2738 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2739 _.info512, "z">, EVEX_V512; 2740 } 2741 let Predicates = [prd, HasVLX] in { 2742 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2743 _.info128, "x">, EVEX_V128; 2744 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2745 _.info256, "y">, EVEX_V256; 2746 } 2747} 2748 2749multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2750 bits<8> opcScalar, X86SchedWriteWidths sched, 2751 Predicate prd> { 2752 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2753 sched, prd>, 2754 EVEX_CD8<32, CD8VF>; 2755 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2756 sched, prd>, 2757 EVEX_CD8<64, CD8VF> , VEX_W; 2758 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2759 sched.Scl, f32x_info, prd>, VEX_LIG, 2760 EVEX_CD8<32, CD8VT1>; 2761 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2762 sched.Scl, f64x_info, prd>, VEX_LIG, 2763 EVEX_CD8<64, CD8VT1>, VEX_W; 2764} 2765 2766defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp, 2767 HasDQI>, AVX512AIi8Base, EVEX; 2768 2769//----------------------------------------------------------------- 2770// Mask register copy, including 2771// - copy between mask registers 2772// - load/store mask registers 2773// - copy from GPR to mask register and vice versa 2774// 2775multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2776 string OpcodeStr, RegisterClass KRC, 2777 ValueType vvt, X86MemOperand x86memop> { 2778 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2779 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2780 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2781 Sched<[WriteMove]>; 2782 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2783 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2784 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2785 Sched<[WriteLoad]>; 2786 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2787 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2788 [(store KRC:$src, addr:$dst)]>, 2789 Sched<[WriteStore]>; 2790} 2791 2792multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2793 string OpcodeStr, 2794 RegisterClass KRC, RegisterClass GRC> { 2795 let hasSideEffects = 0 in { 2796 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2797 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2798 Sched<[WriteMove]>; 2799 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2800 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2801 Sched<[WriteMove]>; 2802 } 2803} 2804 2805let Predicates = [HasDQI] in 2806 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2807 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2808 VEX, PD; 2809 2810let Predicates = [HasAVX512] in 2811 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2812 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2813 VEX, PS; 2814 2815let Predicates = [HasBWI] in { 2816 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2817 VEX, PD, VEX_W; 2818 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2819 VEX, XD; 2820 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2821 VEX, PS, VEX_W; 2822 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2823 VEX, XD, VEX_W; 2824} 2825 2826// GR from/to mask register 2827def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2828 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2829def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2830 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2831 2832def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2833 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2834def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2835 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2836 2837def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2838 (KMOVWrk VK16:$src)>; 2839def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2840 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2841 2842def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2843 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2844def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2845 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2846 2847def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2848 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2849def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2850 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2851def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2852 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2853def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2854 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2855 2856// Load/store kreg 2857let Predicates = [HasDQI] in { 2858 def : Pat<(store VK1:$src, addr:$dst), 2859 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; 2860 2861 def : Pat<(v1i1 (load addr:$src)), 2862 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2863 def : Pat<(v2i1 (load addr:$src)), 2864 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2865 def : Pat<(v4i1 (load addr:$src)), 2866 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2867} 2868 2869let Predicates = [HasAVX512] in { 2870 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2871 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2872 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2873 (KMOVWkm addr:$src)>; 2874} 2875 2876def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2877 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2878 SDTCVecEltisVT<1, i1>, 2879 SDTCisPtrTy<2>]>>; 2880 2881let Predicates = [HasAVX512] in { 2882 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2883 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2884 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2885 2886 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2887 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2888 2889 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2890 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2891 2892 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2893 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2894 } 2895 2896 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2897 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2898 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2899 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2900 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2901 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2902 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2903 2904 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2905 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2906 (COPY_TO_REGCLASS 2907 (KMOVWkr (AND32ri8 2908 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2909 (i32 1))), VK16)>; 2910} 2911 2912// Mask unary operation 2913// - KNOT 2914multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2915 RegisterClass KRC, SDPatternOperator OpNode, 2916 X86FoldableSchedWrite sched, Predicate prd> { 2917 let Predicates = [prd] in 2918 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2919 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2920 [(set KRC:$dst, (OpNode KRC:$src))]>, 2921 Sched<[sched]>; 2922} 2923 2924multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2925 SDPatternOperator OpNode, 2926 X86FoldableSchedWrite sched> { 2927 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2928 sched, HasDQI>, VEX, PD; 2929 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2930 sched, HasAVX512>, VEX, PS; 2931 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2932 sched, HasBWI>, VEX, PD, VEX_W; 2933 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2934 sched, HasBWI>, VEX, PS, VEX_W; 2935} 2936 2937// TODO - do we need a X86SchedWriteWidths::KMASK type? 2938defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2939 2940// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2941let Predicates = [HasAVX512, NoDQI] in 2942def : Pat<(vnot VK8:$src), 2943 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2944 2945def : Pat<(vnot VK4:$src), 2946 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2947def : Pat<(vnot VK2:$src), 2948 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2949 2950// Mask binary operation 2951// - KAND, KANDN, KOR, KXNOR, KXOR 2952multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2953 RegisterClass KRC, SDPatternOperator OpNode, 2954 X86FoldableSchedWrite sched, Predicate prd, 2955 bit IsCommutable> { 2956 let Predicates = [prd], isCommutable = IsCommutable in 2957 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2958 !strconcat(OpcodeStr, 2959 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2960 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2961 Sched<[sched]>; 2962} 2963 2964multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2965 SDPatternOperator OpNode, 2966 X86FoldableSchedWrite sched, bit IsCommutable, 2967 Predicate prdW = HasAVX512> { 2968 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2969 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 2970 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2971 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 2972 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2973 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 2974 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2975 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 2976} 2977 2978def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; 2979def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; 2980// These nodes use 'vnot' instead of 'not' to support vectors. 2981def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 2982def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 2983 2984// TODO - do we need a X86SchedWriteWidths::KMASK type? 2985defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 2986defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 2987defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 2988defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 2989defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 2990defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 2991 2992multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, 2993 Instruction Inst> { 2994 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 2995 // for the DQI set, this type is legal and KxxxB instruction is used 2996 let Predicates = [NoDQI] in 2997 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 2998 (COPY_TO_REGCLASS 2999 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3000 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3001 3002 // All types smaller than 8 bits require conversion anyway 3003 def : Pat<(OpNode VK1:$src1, VK1:$src2), 3004 (COPY_TO_REGCLASS (Inst 3005 (COPY_TO_REGCLASS VK1:$src1, VK16), 3006 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3007 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3008 (COPY_TO_REGCLASS (Inst 3009 (COPY_TO_REGCLASS VK2:$src1, VK16), 3010 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; 3011 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3012 (COPY_TO_REGCLASS (Inst 3013 (COPY_TO_REGCLASS VK4:$src1, VK16), 3014 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; 3015} 3016 3017defm : avx512_binop_pat<and, and, KANDWrr>; 3018defm : avx512_binop_pat<vandn, andn, KANDNWrr>; 3019defm : avx512_binop_pat<or, or, KORWrr>; 3020defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>; 3021defm : avx512_binop_pat<xor, xor, KXORWrr>; 3022 3023// Mask unpacking 3024multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3025 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3026 Predicate prd> { 3027 let Predicates = [prd] in { 3028 let hasSideEffects = 0 in 3029 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3030 (ins Src.KRC:$src1, Src.KRC:$src2), 3031 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3032 VEX_4V, VEX_L, Sched<[sched]>; 3033 3034 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3035 (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>; 3036 } 3037} 3038 3039defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3040defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3041defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3042 3043// Mask bit testing 3044multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3045 SDNode OpNode, X86FoldableSchedWrite sched, 3046 Predicate prd> { 3047 let Predicates = [prd], Defs = [EFLAGS] in 3048 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3049 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3050 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3051 Sched<[sched]>; 3052} 3053 3054multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3055 X86FoldableSchedWrite sched, 3056 Predicate prdW = HasAVX512> { 3057 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3058 VEX, PD; 3059 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3060 VEX, PS; 3061 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3062 VEX, PS, VEX_W; 3063 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3064 VEX, PD, VEX_W; 3065} 3066 3067// TODO - do we need a X86SchedWriteWidths::KMASK type? 3068defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3069defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3070 3071// Mask shift 3072multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3073 SDNode OpNode, X86FoldableSchedWrite sched> { 3074 let Predicates = [HasAVX512] in 3075 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3076 !strconcat(OpcodeStr, 3077 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3078 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>, 3079 Sched<[sched]>; 3080} 3081 3082multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3083 SDNode OpNode, X86FoldableSchedWrite sched> { 3084 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3085 sched>, VEX, TAPD, VEX_W; 3086 let Predicates = [HasDQI] in 3087 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3088 sched>, VEX, TAPD; 3089 let Predicates = [HasBWI] in { 3090 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3091 sched>, VEX, TAPD, VEX_W; 3092 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3093 sched>, VEX, TAPD; 3094 } 3095} 3096 3097defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3098defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3099 3100// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3101multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3102 string InstStr, 3103 X86VectorVTInfo Narrow, 3104 X86VectorVTInfo Wide> { 3105 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), 3106 (Narrow.VT Narrow.RC:$src2))), 3107 (COPY_TO_REGCLASS 3108 (!cast<Instruction>(InstStr#"Zrr") 3109 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3110 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), 3111 Narrow.KRC)>; 3112 3113 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3114 (Frag_su (Narrow.VT Narrow.RC:$src1), 3115 (Narrow.VT Narrow.RC:$src2)))), 3116 (COPY_TO_REGCLASS 3117 (!cast<Instruction>(InstStr#"Zrrk") 3118 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3119 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3120 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), 3121 Narrow.KRC)>; 3122} 3123 3124// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3125multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3126 string InstStr, 3127 X86VectorVTInfo Narrow, 3128 X86VectorVTInfo Wide> { 3129def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3130 (Narrow.VT Narrow.RC:$src2), cond)), 3131 (COPY_TO_REGCLASS 3132 (!cast<Instruction>(InstStr##Zrri) 3133 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3134 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3135 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3136 3137def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3138 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3139 (Narrow.VT Narrow.RC:$src2), 3140 cond)))), 3141 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) 3142 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3143 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3144 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3145 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3146} 3147 3148// Same as above, but for fp types which don't use PatFrags. 3149multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su, 3150 string InstStr, 3151 X86VectorVTInfo Narrow, 3152 X86VectorVTInfo Wide> { 3153def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), 3154 (Narrow.VT Narrow.RC:$src2), imm:$cc)), 3155 (COPY_TO_REGCLASS 3156 (!cast<Instruction>(InstStr##Zrri) 3157 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3158 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3159 imm:$cc), Narrow.KRC)>; 3160 3161def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3162 (OpNode_su (Narrow.VT Narrow.RC:$src1), 3163 (Narrow.VT Narrow.RC:$src2), imm:$cc))), 3164 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) 3165 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3166 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3167 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3168 imm:$cc), Narrow.KRC)>; 3169} 3170 3171let Predicates = [HasAVX512, NoVLX] in { 3172 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 3173 // increase the pattern complexity the way an immediate would. 3174 let AddedComplexity = 2 in { 3175 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>; 3176 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>; 3177 3178 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>; 3179 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>; 3180 3181 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>; 3182 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>; 3183 3184 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>; 3185 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>; 3186 } 3187 3188 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3189 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3190 3191 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3192 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3193 3194 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3195 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3196 3197 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3198 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3199 3200 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>; 3201 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>; 3202 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>; 3203 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>; 3204} 3205 3206let Predicates = [HasBWI, NoVLX] in { 3207 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 3208 // increase the pattern complexity the way an immediate would. 3209 let AddedComplexity = 2 in { 3210 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>; 3211 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>; 3212 3213 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>; 3214 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>; 3215 3216 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>; 3217 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>; 3218 3219 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>; 3220 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>; 3221 } 3222 3223 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3224 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3225 3226 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3227 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3228 3229 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3230 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3231 3232 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3233 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3234} 3235 3236// Mask setting all 0s or 1s 3237multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { 3238 let Predicates = [HasAVX512] in 3239 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3240 SchedRW = [WriteZero] in 3241 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3242 [(set KRC:$dst, (VT Val))]>; 3243} 3244 3245multiclass avx512_mask_setop_w<PatFrag Val> { 3246 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3247 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3248 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3249} 3250 3251defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3252defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3253 3254// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3255let Predicates = [HasAVX512] in { 3256 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3257 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3258 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3259 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3260 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3261 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3262 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3263 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3264} 3265 3266// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3267multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3268 RegisterClass RC, ValueType VT> { 3269 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3270 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3271 3272 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3273 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3274} 3275defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3276defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3277defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3278defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3279defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3280defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3281 3282defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3283defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3284defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3285defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3286defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3287 3288defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3289defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3290defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3291defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3292 3293defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3294defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3295defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3296 3297defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3298defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3299 3300defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3301 3302//===----------------------------------------------------------------------===// 3303// AVX-512 - Aligned and unaligned load and store 3304// 3305 3306multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3307 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3308 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3309 bit NoRMPattern = 0, 3310 SDPatternOperator SelectOprr = vselect> { 3311 let hasSideEffects = 0 in { 3312 let isMoveReg = 1 in 3313 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3314 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3315 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3316 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3317 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3318 (ins _.KRCWM:$mask, _.RC:$src), 3319 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3320 "${dst} {${mask}} {z}, $src}"), 3321 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3322 (_.VT _.RC:$src), 3323 _.ImmAllZerosV)))], _.ExeDomain>, 3324 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3325 3326 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3327 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3328 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3329 !if(NoRMPattern, [], 3330 [(set _.RC:$dst, 3331 (_.VT (ld_frag addr:$src)))]), 3332 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3333 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3334 3335 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3336 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3337 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3338 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3339 "${dst} {${mask}}, $src1}"), 3340 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3341 (_.VT _.RC:$src1), 3342 (_.VT _.RC:$src0))))], _.ExeDomain>, 3343 EVEX, EVEX_K, Sched<[Sched.RR]>; 3344 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3345 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3346 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3347 "${dst} {${mask}}, $src1}"), 3348 [(set _.RC:$dst, (_.VT 3349 (vselect _.KRCWM:$mask, 3350 (_.VT (ld_frag addr:$src1)), 3351 (_.VT _.RC:$src0))))], _.ExeDomain>, 3352 EVEX, EVEX_K, Sched<[Sched.RM]>; 3353 } 3354 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3355 (ins _.KRCWM:$mask, _.MemOp:$src), 3356 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3357 "${dst} {${mask}} {z}, $src}", 3358 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, 3359 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3360 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3361 } 3362 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3363 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3364 3365 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3366 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3367 3368 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3369 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0, 3370 _.KRCWM:$mask, addr:$ptr)>; 3371} 3372 3373multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3374 AVX512VLVectorVTInfo _, Predicate prd, 3375 X86SchedWriteMoveLSWidths Sched, 3376 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3377 let Predicates = [prd] in 3378 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3379 _.info512.AlignedLdFrag, masked_load_aligned, 3380 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3381 3382 let Predicates = [prd, HasVLX] in { 3383 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3384 _.info256.AlignedLdFrag, masked_load_aligned, 3385 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3386 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3387 _.info128.AlignedLdFrag, masked_load_aligned, 3388 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3389 } 3390} 3391 3392multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3393 AVX512VLVectorVTInfo _, Predicate prd, 3394 X86SchedWriteMoveLSWidths Sched, 3395 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3396 SDPatternOperator SelectOprr = vselect> { 3397 let Predicates = [prd] in 3398 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3399 masked_load, Sched.ZMM, "", 3400 NoRMPattern, SelectOprr>, EVEX_V512; 3401 3402 let Predicates = [prd, HasVLX] in { 3403 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3404 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3405 NoRMPattern, SelectOprr>, EVEX_V256; 3406 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3407 masked_load, Sched.XMM, EVEX2VEXOvrd, 3408 NoRMPattern, SelectOprr>, EVEX_V128; 3409 } 3410} 3411 3412multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3413 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3414 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3415 bit NoMRPattern = 0> { 3416 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3417 let isMoveReg = 1 in 3418 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3419 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3420 [], _.ExeDomain>, EVEX, 3421 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3422 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3423 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3424 (ins _.KRCWM:$mask, _.RC:$src), 3425 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3426 "${dst} {${mask}}, $src}", 3427 [], _.ExeDomain>, EVEX, EVEX_K, 3428 FoldGenData<BaseName#_.ZSuffix#rrk>, 3429 Sched<[Sched.RR]>; 3430 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3431 (ins _.KRCWM:$mask, _.RC:$src), 3432 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3433 "${dst} {${mask}} {z}, $src}", 3434 [], _.ExeDomain>, EVEX, EVEX_KZ, 3435 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3436 Sched<[Sched.RR]>; 3437 } 3438 3439 let hasSideEffects = 0, mayStore = 1 in 3440 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3441 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3442 !if(NoMRPattern, [], 3443 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3444 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3445 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3446 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3447 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3448 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3449 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3450 NotMemoryFoldable; 3451 3452 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3453 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3454 _.KRCWM:$mask, _.RC:$src)>; 3455 3456 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3457 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3458 _.RC:$dst, _.RC:$src), 0>; 3459 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3460 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3461 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3462 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3463 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3464 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3465} 3466 3467multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3468 AVX512VLVectorVTInfo _, Predicate prd, 3469 X86SchedWriteMoveLSWidths Sched, 3470 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3471 let Predicates = [prd] in 3472 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3473 masked_store, Sched.ZMM, "", 3474 NoMRPattern>, EVEX_V512; 3475 let Predicates = [prd, HasVLX] in { 3476 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3477 masked_store, Sched.YMM, 3478 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3479 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3480 masked_store, Sched.XMM, EVEX2VEXOvrd, 3481 NoMRPattern>, EVEX_V128; 3482 } 3483} 3484 3485multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3486 AVX512VLVectorVTInfo _, Predicate prd, 3487 X86SchedWriteMoveLSWidths Sched, 3488 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3489 let Predicates = [prd] in 3490 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3491 masked_store_aligned, Sched.ZMM, "", 3492 NoMRPattern>, EVEX_V512; 3493 3494 let Predicates = [prd, HasVLX] in { 3495 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3496 masked_store_aligned, Sched.YMM, 3497 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3498 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3499 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3500 NoMRPattern>, EVEX_V128; 3501 } 3502} 3503 3504defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3505 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3506 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3507 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3508 PS, EVEX_CD8<32, CD8VF>; 3509 3510defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3511 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3512 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3513 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3514 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3515 3516defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3517 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3518 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3519 SchedWriteFMoveLS, "VMOVUPS">, 3520 PS, EVEX_CD8<32, CD8VF>; 3521 3522defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3523 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3524 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3525 SchedWriteFMoveLS, "VMOVUPD">, 3526 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3527 3528defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3529 HasAVX512, SchedWriteVecMoveLS, 3530 "VMOVDQA", 1>, 3531 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3532 HasAVX512, SchedWriteVecMoveLS, 3533 "VMOVDQA", 1>, 3534 PD, EVEX_CD8<32, CD8VF>; 3535 3536defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3537 HasAVX512, SchedWriteVecMoveLS, 3538 "VMOVDQA">, 3539 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3540 HasAVX512, SchedWriteVecMoveLS, 3541 "VMOVDQA">, 3542 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3543 3544defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3545 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3546 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3547 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3548 XD, EVEX_CD8<8, CD8VF>; 3549 3550defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3551 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3552 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3553 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3554 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3555 3556defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3557 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3558 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3559 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3560 XS, EVEX_CD8<32, CD8VF>; 3561 3562defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3563 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3564 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3565 SchedWriteVecMoveLS, "VMOVDQU">, 3566 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3567 3568// Special instructions to help with spilling when we don't have VLX. We need 3569// to load or store from a ZMM register instead. These are converted in 3570// expandPostRAPseudos. 3571let isReMaterializable = 1, canFoldAsLoad = 1, 3572 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3573def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3574 "", []>, Sched<[WriteFLoadX]>; 3575def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3576 "", []>, Sched<[WriteFLoadY]>; 3577def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3578 "", []>, Sched<[WriteFLoadX]>; 3579def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3580 "", []>, Sched<[WriteFLoadY]>; 3581} 3582 3583let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3584def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3585 "", []>, Sched<[WriteFStoreX]>; 3586def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3587 "", []>, Sched<[WriteFStoreY]>; 3588def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3589 "", []>, Sched<[WriteFStoreX]>; 3590def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3591 "", []>, Sched<[WriteFStoreY]>; 3592} 3593 3594def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3595 (v8i64 VR512:$src))), 3596 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3597 VK8), VR512:$src)>; 3598 3599def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3600 (v16i32 VR512:$src))), 3601 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3602 3603// These patterns exist to prevent the above patterns from introducing a second 3604// mask inversion when one already exists. 3605def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), 3606 (v8i64 immAllZerosV), 3607 (v8i64 VR512:$src))), 3608 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3609def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), 3610 (v16i32 immAllZerosV), 3611 (v16i32 VR512:$src))), 3612 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3613 3614multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3615 X86VectorVTInfo Wide> { 3616 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3617 Narrow.RC:$src1, Narrow.RC:$src0)), 3618 (EXTRACT_SUBREG 3619 (Wide.VT 3620 (!cast<Instruction>(InstrStr#"rrk") 3621 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3622 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3623 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3624 Narrow.SubRegIdx)>; 3625 3626 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3627 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3628 (EXTRACT_SUBREG 3629 (Wide.VT 3630 (!cast<Instruction>(InstrStr#"rrkz") 3631 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3632 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3633 Narrow.SubRegIdx)>; 3634} 3635 3636// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3637// available. Use a 512-bit operation and extract. 3638let Predicates = [HasAVX512, NoVLX] in { 3639 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3640 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3641 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3642 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3643 3644 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3645 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3646 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3647 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3648} 3649 3650let Predicates = [HasBWI, NoVLX] in { 3651 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3652 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3653 3654 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3655 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3656} 3657 3658let Predicates = [HasAVX512] in { 3659 // 512-bit load. 3660 def : Pat<(alignedloadv16i32 addr:$src), 3661 (VMOVDQA64Zrm addr:$src)>; 3662 def : Pat<(alignedloadv32i16 addr:$src), 3663 (VMOVDQA64Zrm addr:$src)>; 3664 def : Pat<(alignedloadv64i8 addr:$src), 3665 (VMOVDQA64Zrm addr:$src)>; 3666 def : Pat<(loadv16i32 addr:$src), 3667 (VMOVDQU64Zrm addr:$src)>; 3668 def : Pat<(loadv32i16 addr:$src), 3669 (VMOVDQU64Zrm addr:$src)>; 3670 def : Pat<(loadv64i8 addr:$src), 3671 (VMOVDQU64Zrm addr:$src)>; 3672 3673 // 512-bit store. 3674 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3675 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3676 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3677 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3678 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3679 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3680 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3681 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3682 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3683 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3684 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3685 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3686} 3687 3688let Predicates = [HasVLX] in { 3689 // 128-bit load. 3690 def : Pat<(alignedloadv4i32 addr:$src), 3691 (VMOVDQA64Z128rm addr:$src)>; 3692 def : Pat<(alignedloadv8i16 addr:$src), 3693 (VMOVDQA64Z128rm addr:$src)>; 3694 def : Pat<(alignedloadv16i8 addr:$src), 3695 (VMOVDQA64Z128rm addr:$src)>; 3696 def : Pat<(loadv4i32 addr:$src), 3697 (VMOVDQU64Z128rm addr:$src)>; 3698 def : Pat<(loadv8i16 addr:$src), 3699 (VMOVDQU64Z128rm addr:$src)>; 3700 def : Pat<(loadv16i8 addr:$src), 3701 (VMOVDQU64Z128rm addr:$src)>; 3702 3703 // 128-bit store. 3704 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3705 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3706 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3707 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3708 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3709 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3710 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3711 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3712 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3713 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3714 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3715 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3716 3717 // 256-bit load. 3718 def : Pat<(alignedloadv8i32 addr:$src), 3719 (VMOVDQA64Z256rm addr:$src)>; 3720 def : Pat<(alignedloadv16i16 addr:$src), 3721 (VMOVDQA64Z256rm addr:$src)>; 3722 def : Pat<(alignedloadv32i8 addr:$src), 3723 (VMOVDQA64Z256rm addr:$src)>; 3724 def : Pat<(loadv8i32 addr:$src), 3725 (VMOVDQU64Z256rm addr:$src)>; 3726 def : Pat<(loadv16i16 addr:$src), 3727 (VMOVDQU64Z256rm addr:$src)>; 3728 def : Pat<(loadv32i8 addr:$src), 3729 (VMOVDQU64Z256rm addr:$src)>; 3730 3731 // 256-bit store. 3732 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3733 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3734 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3735 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3736 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3737 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3738 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3739 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3740 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3741 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3742 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3743 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3744} 3745 3746// Move Int Doubleword to Packed Double Int 3747// 3748let ExeDomain = SSEPackedInt in { 3749def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3750 "vmovd\t{$src, $dst|$dst, $src}", 3751 [(set VR128X:$dst, 3752 (v4i32 (scalar_to_vector GR32:$src)))]>, 3753 EVEX, Sched<[WriteVecMoveFromGpr]>; 3754def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3755 "vmovd\t{$src, $dst|$dst, $src}", 3756 [(set VR128X:$dst, 3757 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3758 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3759def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3760 "vmovq\t{$src, $dst|$dst, $src}", 3761 [(set VR128X:$dst, 3762 (v2i64 (scalar_to_vector GR64:$src)))]>, 3763 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3764let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3765def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3766 (ins i64mem:$src), 3767 "vmovq\t{$src, $dst|$dst, $src}", []>, 3768 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3769let isCodeGenOnly = 1 in { 3770def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3771 "vmovq\t{$src, $dst|$dst, $src}", 3772 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3773 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3774def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3775 "vmovq\t{$src, $dst|$dst, $src}", 3776 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3777 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3778} 3779} // ExeDomain = SSEPackedInt 3780 3781// Move Int Doubleword to Single Scalar 3782// 3783let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3784def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3785 "vmovd\t{$src, $dst|$dst, $src}", 3786 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3787 EVEX, Sched<[WriteVecMoveFromGpr]>; 3788} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3789 3790// Move doubleword from xmm register to r/m32 3791// 3792let ExeDomain = SSEPackedInt in { 3793def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3794 "vmovd\t{$src, $dst|$dst, $src}", 3795 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3796 (iPTR 0)))]>, 3797 EVEX, Sched<[WriteVecMoveToGpr]>; 3798def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3799 (ins i32mem:$dst, VR128X:$src), 3800 "vmovd\t{$src, $dst|$dst, $src}", 3801 [(store (i32 (extractelt (v4i32 VR128X:$src), 3802 (iPTR 0))), addr:$dst)]>, 3803 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3804} // ExeDomain = SSEPackedInt 3805 3806// Move quadword from xmm1 register to r/m64 3807// 3808let ExeDomain = SSEPackedInt in { 3809def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3810 "vmovq\t{$src, $dst|$dst, $src}", 3811 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3812 (iPTR 0)))]>, 3813 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 3814 Requires<[HasAVX512]>; 3815 3816let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3817def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3818 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 3819 EVEX, VEX_W, Sched<[WriteVecStore]>, 3820 Requires<[HasAVX512, In64BitMode]>; 3821 3822def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3823 (ins i64mem:$dst, VR128X:$src), 3824 "vmovq\t{$src, $dst|$dst, $src}", 3825 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3826 addr:$dst)]>, 3827 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 3828 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3829 3830let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3831def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3832 (ins VR128X:$src), 3833 "vmovq\t{$src, $dst|$dst, $src}", []>, 3834 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 3835} // ExeDomain = SSEPackedInt 3836 3837def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3838 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3839 3840let Predicates = [HasAVX512] in { 3841 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3842 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3843} 3844 3845// Move Scalar Single to Double Int 3846// 3847let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3848def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3849 (ins FR32X:$src), 3850 "vmovd\t{$src, $dst|$dst, $src}", 3851 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3852 EVEX, Sched<[WriteVecMoveToGpr]>; 3853} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3854 3855// Move Quadword Int to Packed Quadword Int 3856// 3857let ExeDomain = SSEPackedInt in { 3858def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3859 (ins i64mem:$src), 3860 "vmovq\t{$src, $dst|$dst, $src}", 3861 [(set VR128X:$dst, 3862 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3863 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3864} // ExeDomain = SSEPackedInt 3865 3866// Allow "vmovd" but print "vmovq". 3867def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3868 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3869def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3870 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3871 3872//===----------------------------------------------------------------------===// 3873// AVX-512 MOVSS, MOVSD 3874//===----------------------------------------------------------------------===// 3875 3876multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3877 X86VectorVTInfo _> { 3878 let Predicates = [HasAVX512, OptForSize] in 3879 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3880 (ins _.RC:$src1, _.RC:$src2), 3881 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3882 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3883 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 3884 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3885 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3886 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3887 "$dst {${mask}} {z}, $src1, $src2}"), 3888 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3889 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3890 _.ImmAllZerosV)))], 3891 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3892 let Constraints = "$src0 = $dst" in 3893 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3894 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3895 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3896 "$dst {${mask}}, $src1, $src2}"), 3897 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3898 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3899 (_.VT _.RC:$src0))))], 3900 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3901 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3902 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3903 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3904 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3905 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3906 // _alt version uses FR32/FR64 register class. 3907 let isCodeGenOnly = 1 in 3908 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3909 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3910 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3911 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3912 } 3913 let mayLoad = 1, hasSideEffects = 0 in { 3914 let Constraints = "$src0 = $dst" in 3915 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3916 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3917 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3918 "$dst {${mask}}, $src}"), 3919 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3920 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3921 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3922 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3923 "$dst {${mask}} {z}, $src}"), 3924 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3925 } 3926 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3927 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3928 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3929 EVEX, Sched<[WriteFStore]>; 3930 let mayStore = 1, hasSideEffects = 0 in 3931 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3932 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 3933 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3934 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 3935 NotMemoryFoldable; 3936} 3937 3938defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 3939 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 3940 3941defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 3942 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 3943 3944 3945multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 3946 PatLeaf ZeroFP, X86VectorVTInfo _> { 3947 3948def : Pat<(_.VT (OpNode _.RC:$src0, 3949 (_.VT (scalar_to_vector 3950 (_.EltVT (X86selects VK1WM:$mask, 3951 (_.EltVT _.FRC:$src1), 3952 (_.EltVT _.FRC:$src2))))))), 3953 (!cast<Instruction>(InstrStr#rrk) 3954 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 3955 VK1WM:$mask, 3956 (_.VT _.RC:$src0), 3957 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3958 3959def : Pat<(_.VT (OpNode _.RC:$src0, 3960 (_.VT (scalar_to_vector 3961 (_.EltVT (X86selects VK1WM:$mask, 3962 (_.EltVT _.FRC:$src1), 3963 (_.EltVT ZeroFP))))))), 3964 (!cast<Instruction>(InstrStr#rrkz) 3965 VK1WM:$mask, 3966 (_.VT _.RC:$src0), 3967 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3968} 3969 3970multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 3971 dag Mask, RegisterClass MaskRC> { 3972 3973def : Pat<(masked_store 3974 (_.info512.VT (insert_subvector undef, 3975 (_.info128.VT _.info128.RC:$src), 3976 (iPTR 0))), addr:$dst, Mask), 3977 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 3978 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 3979 _.info128.RC:$src)>; 3980 3981} 3982 3983multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 3984 AVX512VLVectorVTInfo _, 3985 dag Mask, RegisterClass MaskRC, 3986 SubRegIndex subreg> { 3987 3988def : Pat<(masked_store 3989 (_.info512.VT (insert_subvector undef, 3990 (_.info128.VT _.info128.RC:$src), 3991 (iPTR 0))), addr:$dst, Mask), 3992 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 3993 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 3994 _.info128.RC:$src)>; 3995 3996} 3997 3998// This matches the more recent codegen from clang that avoids emitting a 512 3999// bit masked store directly. Codegen will widen 128-bit masked store to 512 4000// bits on AVX512F only targets. 4001multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4002 AVX512VLVectorVTInfo _, 4003 dag Mask512, dag Mask128, 4004 RegisterClass MaskRC, 4005 SubRegIndex subreg> { 4006 4007// AVX512F pattern. 4008def : Pat<(masked_store 4009 (_.info512.VT (insert_subvector undef, 4010 (_.info128.VT _.info128.RC:$src), 4011 (iPTR 0))), addr:$dst, Mask512), 4012 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4013 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4014 _.info128.RC:$src)>; 4015 4016// AVX512VL pattern. 4017def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4018 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4019 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4020 _.info128.RC:$src)>; 4021} 4022 4023multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4024 dag Mask, RegisterClass MaskRC> { 4025 4026def : Pat<(_.info128.VT (extract_subvector 4027 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4028 _.info512.ImmAllZerosV)), 4029 (iPTR 0))), 4030 (!cast<Instruction>(InstrStr#rmkz) 4031 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4032 addr:$srcAddr)>; 4033 4034def : Pat<(_.info128.VT (extract_subvector 4035 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4036 (_.info512.VT (insert_subvector undef, 4037 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4038 (iPTR 0))))), 4039 (iPTR 0))), 4040 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4041 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4042 addr:$srcAddr)>; 4043 4044} 4045 4046multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4047 AVX512VLVectorVTInfo _, 4048 dag Mask, RegisterClass MaskRC, 4049 SubRegIndex subreg> { 4050 4051def : Pat<(_.info128.VT (extract_subvector 4052 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4053 _.info512.ImmAllZerosV)), 4054 (iPTR 0))), 4055 (!cast<Instruction>(InstrStr#rmkz) 4056 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4057 addr:$srcAddr)>; 4058 4059def : Pat<(_.info128.VT (extract_subvector 4060 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4061 (_.info512.VT (insert_subvector undef, 4062 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4063 (iPTR 0))))), 4064 (iPTR 0))), 4065 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4066 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4067 addr:$srcAddr)>; 4068 4069} 4070 4071// This matches the more recent codegen from clang that avoids emitting a 512 4072// bit masked load directly. Codegen will widen 128-bit masked load to 512 4073// bits on AVX512F only targets. 4074multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4075 AVX512VLVectorVTInfo _, 4076 dag Mask512, dag Mask128, 4077 RegisterClass MaskRC, 4078 SubRegIndex subreg> { 4079// AVX512F patterns. 4080def : Pat<(_.info128.VT (extract_subvector 4081 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4082 _.info512.ImmAllZerosV)), 4083 (iPTR 0))), 4084 (!cast<Instruction>(InstrStr#rmkz) 4085 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4086 addr:$srcAddr)>; 4087 4088def : Pat<(_.info128.VT (extract_subvector 4089 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4090 (_.info512.VT (insert_subvector undef, 4091 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4092 (iPTR 0))))), 4093 (iPTR 0))), 4094 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4095 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4096 addr:$srcAddr)>; 4097 4098// AVX512Vl patterns. 4099def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4100 _.info128.ImmAllZerosV)), 4101 (!cast<Instruction>(InstrStr#rmkz) 4102 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4103 addr:$srcAddr)>; 4104 4105def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4106 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4107 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4108 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4109 addr:$srcAddr)>; 4110} 4111 4112defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4113defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4114 4115defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4116 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4117defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4118 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4119defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4120 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4121 4122defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4123 (v16i1 (insert_subvector 4124 (v16i1 immAllZerosV), 4125 (v4i1 (extract_subvector 4126 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4127 (iPTR 0))), 4128 (iPTR 0))), 4129 (v4i1 (extract_subvector 4130 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4131 (iPTR 0))), GR8, sub_8bit>; 4132defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4133 (v8i1 4134 (extract_subvector 4135 (v16i1 4136 (insert_subvector 4137 (v16i1 immAllZerosV), 4138 (v2i1 (extract_subvector 4139 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4140 (iPTR 0))), 4141 (iPTR 0))), 4142 (iPTR 0))), 4143 (v2i1 (extract_subvector 4144 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4145 (iPTR 0))), GR8, sub_8bit>; 4146 4147defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4148 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4149defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4150 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4151defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4152 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4153 4154defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4155 (v16i1 (insert_subvector 4156 (v16i1 immAllZerosV), 4157 (v4i1 (extract_subvector 4158 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4159 (iPTR 0))), 4160 (iPTR 0))), 4161 (v4i1 (extract_subvector 4162 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4163 (iPTR 0))), GR8, sub_8bit>; 4164defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4165 (v8i1 4166 (extract_subvector 4167 (v16i1 4168 (insert_subvector 4169 (v16i1 immAllZerosV), 4170 (v2i1 (extract_subvector 4171 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4172 (iPTR 0))), 4173 (iPTR 0))), 4174 (iPTR 0))), 4175 (v2i1 (extract_subvector 4176 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4177 (iPTR 0))), GR8, sub_8bit>; 4178 4179def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4180 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4181 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4182 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4183 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4184 4185def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4186 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4187 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4188 4189def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4190 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4191 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4192 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4193 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4194 4195def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)), 4196 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4197 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4198 4199let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4200 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4201 (ins VR128X:$src1, VR128X:$src2), 4202 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4203 []>, XS, EVEX_4V, VEX_LIG, 4204 FoldGenData<"VMOVSSZrr">, 4205 Sched<[SchedWriteFShuffle.XMM]>; 4206 4207 let Constraints = "$src0 = $dst" in 4208 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4209 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4210 VR128X:$src1, VR128X:$src2), 4211 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4212 "$dst {${mask}}, $src1, $src2}", 4213 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4214 FoldGenData<"VMOVSSZrrk">, 4215 Sched<[SchedWriteFShuffle.XMM]>; 4216 4217 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4218 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4219 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4220 "$dst {${mask}} {z}, $src1, $src2}", 4221 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4222 FoldGenData<"VMOVSSZrrkz">, 4223 Sched<[SchedWriteFShuffle.XMM]>; 4224 4225 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4226 (ins VR128X:$src1, VR128X:$src2), 4227 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4228 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4229 FoldGenData<"VMOVSDZrr">, 4230 Sched<[SchedWriteFShuffle.XMM]>; 4231 4232 let Constraints = "$src0 = $dst" in 4233 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4234 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4235 VR128X:$src1, VR128X:$src2), 4236 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4237 "$dst {${mask}}, $src1, $src2}", 4238 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4239 VEX_W, FoldGenData<"VMOVSDZrrk">, 4240 Sched<[SchedWriteFShuffle.XMM]>; 4241 4242 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4243 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4244 VR128X:$src2), 4245 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4246 "$dst {${mask}} {z}, $src1, $src2}", 4247 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4248 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4249 Sched<[SchedWriteFShuffle.XMM]>; 4250} 4251 4252def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4253 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4254def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4255 "$dst {${mask}}, $src1, $src2}", 4256 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4257 VR128X:$src1, VR128X:$src2), 0>; 4258def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4259 "$dst {${mask}} {z}, $src1, $src2}", 4260 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4261 VR128X:$src1, VR128X:$src2), 0>; 4262def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4263 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4264def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4265 "$dst {${mask}}, $src1, $src2}", 4266 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4267 VR128X:$src1, VR128X:$src2), 0>; 4268def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4269 "$dst {${mask}} {z}, $src1, $src2}", 4270 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4271 VR128X:$src1, VR128X:$src2), 0>; 4272 4273let Predicates = [HasAVX512, OptForSize] in { 4274 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4275 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4276 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4277 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4278 4279 // Move low f32 and clear high bits. 4280 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4281 (SUBREG_TO_REG (i32 0), 4282 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4283 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4284 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4285 (SUBREG_TO_REG (i32 0), 4286 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4287 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4288 4289 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4290 (SUBREG_TO_REG (i32 0), 4291 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4292 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4293 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4294 (SUBREG_TO_REG (i32 0), 4295 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4296 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4297} 4298 4299// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4300// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4301let Predicates = [HasAVX512, OptForSpeed] in { 4302 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4303 (SUBREG_TO_REG (i32 0), 4304 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4305 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4306 (i8 1))), sub_xmm)>; 4307 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4308 (SUBREG_TO_REG (i32 0), 4309 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4310 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4311 (i8 3))), sub_xmm)>; 4312} 4313 4314let Predicates = [HasAVX512] in { 4315 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4316 (VMOVSSZrm addr:$src)>; 4317 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4318 (VMOVSDZrm addr:$src)>; 4319 4320 // Represent the same patterns above but in the form they appear for 4321 // 256-bit types 4322 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4323 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4324 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4325 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4326 4327 // Represent the same patterns above but in the form they appear for 4328 // 512-bit types 4329 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4330 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4331 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4332 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4333} 4334 4335let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4336def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4337 (ins VR128X:$src), 4338 "vmovq\t{$src, $dst|$dst, $src}", 4339 [(set VR128X:$dst, (v2i64 (X86vzmovl 4340 (v2i64 VR128X:$src))))]>, 4341 EVEX, VEX_W; 4342} 4343 4344let Predicates = [HasAVX512] in { 4345 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4346 (VMOVDI2PDIZrr GR32:$src)>; 4347 4348 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4349 (VMOV64toPQIZrr GR64:$src)>; 4350 4351 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4352 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 4353 (VMOVDI2PDIZrm addr:$src)>; 4354 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4355 (VMOVDI2PDIZrm addr:$src)>; 4356 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4357 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4358 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4359 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4360 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4361 (VMOVQI2PQIZrm addr:$src)>; 4362 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4363 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4364 4365 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4366 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4367 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4368 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4369 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4370 4371 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4372 (SUBREG_TO_REG (i32 0), 4373 (v2f64 (VMOVZPQILo2PQIZrr 4374 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4375 sub_xmm)>; 4376 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4377 (SUBREG_TO_REG (i32 0), 4378 (v2i64 (VMOVZPQILo2PQIZrr 4379 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4380 sub_xmm)>; 4381 4382 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4383 (SUBREG_TO_REG (i32 0), 4384 (v2f64 (VMOVZPQILo2PQIZrr 4385 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4386 sub_xmm)>; 4387 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4388 (SUBREG_TO_REG (i32 0), 4389 (v2i64 (VMOVZPQILo2PQIZrr 4390 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4391 sub_xmm)>; 4392} 4393 4394//===----------------------------------------------------------------------===// 4395// AVX-512 - Non-temporals 4396//===----------------------------------------------------------------------===// 4397 4398def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4399 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4400 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4401 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4402 4403let Predicates = [HasVLX] in { 4404 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4405 (ins i256mem:$src), 4406 "vmovntdqa\t{$src, $dst|$dst, $src}", 4407 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4408 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4409 4410 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4411 (ins i128mem:$src), 4412 "vmovntdqa\t{$src, $dst|$dst, $src}", 4413 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4414 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4415} 4416 4417multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4418 X86SchedWriteMoveLS Sched, 4419 PatFrag st_frag = alignednontemporalstore> { 4420 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4421 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4422 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4423 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4424 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4425} 4426 4427multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4428 AVX512VLVectorVTInfo VTInfo, 4429 X86SchedWriteMoveLSWidths Sched> { 4430 let Predicates = [HasAVX512] in 4431 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4432 4433 let Predicates = [HasAVX512, HasVLX] in { 4434 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4435 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4436 } 4437} 4438 4439defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4440 SchedWriteVecMoveLSNT>, PD; 4441defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4442 SchedWriteFMoveLSNT>, PD, VEX_W; 4443defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4444 SchedWriteFMoveLSNT>, PS; 4445 4446let Predicates = [HasAVX512], AddedComplexity = 400 in { 4447 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4448 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4449 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4450 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4451 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4452 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4453 4454 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4455 (VMOVNTDQAZrm addr:$src)>; 4456 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4457 (VMOVNTDQAZrm addr:$src)>; 4458 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4459 (VMOVNTDQAZrm addr:$src)>; 4460 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4461 (VMOVNTDQAZrm addr:$src)>; 4462 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4463 (VMOVNTDQAZrm addr:$src)>; 4464 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4465 (VMOVNTDQAZrm addr:$src)>; 4466} 4467 4468let Predicates = [HasVLX], AddedComplexity = 400 in { 4469 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4470 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4471 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4472 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4473 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4474 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4475 4476 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4477 (VMOVNTDQAZ256rm addr:$src)>; 4478 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4479 (VMOVNTDQAZ256rm addr:$src)>; 4480 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4481 (VMOVNTDQAZ256rm addr:$src)>; 4482 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4483 (VMOVNTDQAZ256rm addr:$src)>; 4484 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4485 (VMOVNTDQAZ256rm addr:$src)>; 4486 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4487 (VMOVNTDQAZ256rm addr:$src)>; 4488 4489 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4490 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4491 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4492 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4493 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4494 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4495 4496 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4497 (VMOVNTDQAZ128rm addr:$src)>; 4498 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4499 (VMOVNTDQAZ128rm addr:$src)>; 4500 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4501 (VMOVNTDQAZ128rm addr:$src)>; 4502 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4503 (VMOVNTDQAZ128rm addr:$src)>; 4504 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4505 (VMOVNTDQAZ128rm addr:$src)>; 4506 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4507 (VMOVNTDQAZ128rm addr:$src)>; 4508} 4509 4510//===----------------------------------------------------------------------===// 4511// AVX-512 - Integer arithmetic 4512// 4513multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4514 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4515 bit IsCommutable = 0> { 4516 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4517 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4518 "$src2, $src1", "$src1, $src2", 4519 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4520 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4521 Sched<[sched]>; 4522 4523 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4524 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4525 "$src2, $src1", "$src1, $src2", 4526 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4527 AVX512BIBase, EVEX_4V, 4528 Sched<[sched.Folded, sched.ReadAfterFold]>; 4529} 4530 4531multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4532 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4533 bit IsCommutable = 0> : 4534 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4535 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4536 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4537 "${src2}"##_.BroadcastStr##", $src1", 4538 "$src1, ${src2}"##_.BroadcastStr, 4539 (_.VT (OpNode _.RC:$src1, 4540 (X86VBroadcast 4541 (_.ScalarLdFrag addr:$src2))))>, 4542 AVX512BIBase, EVEX_4V, EVEX_B, 4543 Sched<[sched.Folded, sched.ReadAfterFold]>; 4544} 4545 4546multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4547 AVX512VLVectorVTInfo VTInfo, 4548 X86SchedWriteWidths sched, Predicate prd, 4549 bit IsCommutable = 0> { 4550 let Predicates = [prd] in 4551 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4552 IsCommutable>, EVEX_V512; 4553 4554 let Predicates = [prd, HasVLX] in { 4555 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4556 sched.YMM, IsCommutable>, EVEX_V256; 4557 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4558 sched.XMM, IsCommutable>, EVEX_V128; 4559 } 4560} 4561 4562multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4563 AVX512VLVectorVTInfo VTInfo, 4564 X86SchedWriteWidths sched, Predicate prd, 4565 bit IsCommutable = 0> { 4566 let Predicates = [prd] in 4567 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4568 IsCommutable>, EVEX_V512; 4569 4570 let Predicates = [prd, HasVLX] in { 4571 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4572 sched.YMM, IsCommutable>, EVEX_V256; 4573 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4574 sched.XMM, IsCommutable>, EVEX_V128; 4575 } 4576} 4577 4578multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4579 X86SchedWriteWidths sched, Predicate prd, 4580 bit IsCommutable = 0> { 4581 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4582 sched, prd, IsCommutable>, 4583 VEX_W, EVEX_CD8<64, CD8VF>; 4584} 4585 4586multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4587 X86SchedWriteWidths sched, Predicate prd, 4588 bit IsCommutable = 0> { 4589 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4590 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4591} 4592 4593multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4594 X86SchedWriteWidths sched, Predicate prd, 4595 bit IsCommutable = 0> { 4596 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4597 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4598 VEX_WIG; 4599} 4600 4601multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4602 X86SchedWriteWidths sched, Predicate prd, 4603 bit IsCommutable = 0> { 4604 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4605 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4606 VEX_WIG; 4607} 4608 4609multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4610 SDNode OpNode, X86SchedWriteWidths sched, 4611 Predicate prd, bit IsCommutable = 0> { 4612 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4613 IsCommutable>; 4614 4615 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4616 IsCommutable>; 4617} 4618 4619multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4620 SDNode OpNode, X86SchedWriteWidths sched, 4621 Predicate prd, bit IsCommutable = 0> { 4622 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4623 IsCommutable>; 4624 4625 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4626 IsCommutable>; 4627} 4628 4629multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4630 bits<8> opc_d, bits<8> opc_q, 4631 string OpcodeStr, SDNode OpNode, 4632 X86SchedWriteWidths sched, 4633 bit IsCommutable = 0> { 4634 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4635 sched, HasAVX512, IsCommutable>, 4636 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4637 sched, HasBWI, IsCommutable>; 4638} 4639 4640multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4641 X86FoldableSchedWrite sched, 4642 SDNode OpNode,X86VectorVTInfo _Src, 4643 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4644 bit IsCommutable = 0> { 4645 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4646 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4647 "$src2, $src1","$src1, $src2", 4648 (_Dst.VT (OpNode 4649 (_Src.VT _Src.RC:$src1), 4650 (_Src.VT _Src.RC:$src2))), 4651 IsCommutable>, 4652 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4653 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4654 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4655 "$src2, $src1", "$src1, $src2", 4656 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4657 (_Src.LdFrag addr:$src2)))>, 4658 AVX512BIBase, EVEX_4V, 4659 Sched<[sched.Folded, sched.ReadAfterFold]>; 4660 4661 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4662 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4663 OpcodeStr, 4664 "${src2}"##_Brdct.BroadcastStr##", $src1", 4665 "$src1, ${src2}"##_Brdct.BroadcastStr, 4666 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4667 (_Brdct.VT (X86VBroadcast 4668 (_Brdct.ScalarLdFrag addr:$src2))))))>, 4669 AVX512BIBase, EVEX_4V, EVEX_B, 4670 Sched<[sched.Folded, sched.ReadAfterFold]>; 4671} 4672 4673defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4674 SchedWriteVecALU, 1>; 4675defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4676 SchedWriteVecALU, 0>; 4677defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4678 SchedWriteVecALU, HasBWI, 1>; 4679defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4680 SchedWriteVecALU, HasBWI, 0>; 4681defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4682 SchedWriteVecALU, HasBWI, 1>; 4683defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4684 SchedWriteVecALU, HasBWI, 0>; 4685defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4686 SchedWritePMULLD, HasAVX512, 1>, T8PD; 4687defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4688 SchedWriteVecIMul, HasBWI, 1>; 4689defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4690 SchedWriteVecIMul, HasDQI, 1>, T8PD, 4691 NotEVEX2VEXConvertible; 4692defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4693 HasBWI, 1>; 4694defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4695 HasBWI, 1>; 4696defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4697 SchedWriteVecIMul, HasBWI, 1>, T8PD; 4698defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 4699 SchedWriteVecALU, HasBWI, 1>; 4700defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4701 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 4702defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4703 SchedWriteVecIMul, HasAVX512, 1>; 4704 4705multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4706 X86SchedWriteWidths sched, 4707 AVX512VLVectorVTInfo _SrcVTInfo, 4708 AVX512VLVectorVTInfo _DstVTInfo, 4709 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4710 let Predicates = [prd] in 4711 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4712 _SrcVTInfo.info512, _DstVTInfo.info512, 4713 v8i64_info, IsCommutable>, 4714 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 4715 let Predicates = [HasVLX, prd] in { 4716 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4717 _SrcVTInfo.info256, _DstVTInfo.info256, 4718 v4i64x_info, IsCommutable>, 4719 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 4720 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4721 _SrcVTInfo.info128, _DstVTInfo.info128, 4722 v2i64x_info, IsCommutable>, 4723 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 4724 } 4725} 4726 4727defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4728 avx512vl_i8_info, avx512vl_i8_info, 4729 X86multishift, HasVBMI, 0>, T8PD; 4730 4731multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4732 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4733 X86FoldableSchedWrite sched> { 4734 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4735 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4736 OpcodeStr, 4737 "${src2}"##_Src.BroadcastStr##", $src1", 4738 "$src1, ${src2}"##_Src.BroadcastStr, 4739 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4740 (_Src.VT (X86VBroadcast 4741 (_Src.ScalarLdFrag addr:$src2))))))>, 4742 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4743 Sched<[sched.Folded, sched.ReadAfterFold]>; 4744} 4745 4746multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4747 SDNode OpNode,X86VectorVTInfo _Src, 4748 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4749 bit IsCommutable = 0> { 4750 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4751 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4752 "$src2, $src1","$src1, $src2", 4753 (_Dst.VT (OpNode 4754 (_Src.VT _Src.RC:$src1), 4755 (_Src.VT _Src.RC:$src2))), 4756 IsCommutable, IsCommutable>, 4757 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 4758 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4759 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4760 "$src2, $src1", "$src1, $src2", 4761 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4762 (_Src.LdFrag addr:$src2)))>, 4763 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 4764 Sched<[sched.Folded, sched.ReadAfterFold]>; 4765} 4766 4767multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4768 SDNode OpNode> { 4769 let Predicates = [HasBWI] in 4770 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4771 v32i16_info, SchedWriteShuffle.ZMM>, 4772 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4773 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4774 let Predicates = [HasBWI, HasVLX] in { 4775 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4776 v16i16x_info, SchedWriteShuffle.YMM>, 4777 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4778 v16i16x_info, SchedWriteShuffle.YMM>, 4779 EVEX_V256; 4780 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4781 v8i16x_info, SchedWriteShuffle.XMM>, 4782 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4783 v8i16x_info, SchedWriteShuffle.XMM>, 4784 EVEX_V128; 4785 } 4786} 4787multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4788 SDNode OpNode> { 4789 let Predicates = [HasBWI] in 4790 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4791 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 4792 let Predicates = [HasBWI, HasVLX] in { 4793 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4794 v32i8x_info, SchedWriteShuffle.YMM>, 4795 EVEX_V256, VEX_WIG; 4796 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4797 v16i8x_info, SchedWriteShuffle.XMM>, 4798 EVEX_V128, VEX_WIG; 4799 } 4800} 4801 4802multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4803 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4804 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4805 let Predicates = [HasBWI] in 4806 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4807 _Dst.info512, SchedWriteVecIMul.ZMM, 4808 IsCommutable>, EVEX_V512; 4809 let Predicates = [HasBWI, HasVLX] in { 4810 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4811 _Dst.info256, SchedWriteVecIMul.YMM, 4812 IsCommutable>, EVEX_V256; 4813 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4814 _Dst.info128, SchedWriteVecIMul.XMM, 4815 IsCommutable>, EVEX_V128; 4816 } 4817} 4818 4819defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4820defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4821defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4822defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4823 4824defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4825 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 4826defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4827 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 4828 4829defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4830 SchedWriteVecALU, HasBWI, 1>, T8PD; 4831defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4832 SchedWriteVecALU, HasBWI, 1>; 4833defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4834 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4835defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4836 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4837 NotEVEX2VEXConvertible; 4838 4839defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4840 SchedWriteVecALU, HasBWI, 1>; 4841defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4842 SchedWriteVecALU, HasBWI, 1>, T8PD; 4843defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4844 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4845defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4846 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4847 NotEVEX2VEXConvertible; 4848 4849defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4850 SchedWriteVecALU, HasBWI, 1>, T8PD; 4851defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4852 SchedWriteVecALU, HasBWI, 1>; 4853defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4854 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4855defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4856 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4857 NotEVEX2VEXConvertible; 4858 4859defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4860 SchedWriteVecALU, HasBWI, 1>; 4861defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4862 SchedWriteVecALU, HasBWI, 1>, T8PD; 4863defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4864 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4865defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4866 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4867 NotEVEX2VEXConvertible; 4868 4869// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4870let Predicates = [HasDQI, NoVLX] in { 4871 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4872 (EXTRACT_SUBREG 4873 (VPMULLQZrr 4874 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4875 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4876 sub_ymm)>; 4877 4878 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 4879 (EXTRACT_SUBREG 4880 (VPMULLQZrr 4881 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4882 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4883 sub_xmm)>; 4884} 4885 4886// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4887let Predicates = [HasDQI, NoVLX] in { 4888 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4889 (EXTRACT_SUBREG 4890 (VPMULLQZrr 4891 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4892 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4893 sub_ymm)>; 4894 4895 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 4896 (EXTRACT_SUBREG 4897 (VPMULLQZrr 4898 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4899 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4900 sub_xmm)>; 4901} 4902 4903multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> { 4904 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 4905 (EXTRACT_SUBREG 4906 (Instr 4907 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4908 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4909 sub_ymm)>; 4910 4911 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 4912 (EXTRACT_SUBREG 4913 (Instr 4914 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4915 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4916 sub_xmm)>; 4917} 4918 4919let Predicates = [HasAVX512, NoVLX] in { 4920 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>; 4921 defm : avx512_min_max_lowering<VPMINUQZrr, umin>; 4922 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>; 4923 defm : avx512_min_max_lowering<VPMINSQZrr, smin>; 4924} 4925 4926//===----------------------------------------------------------------------===// 4927// AVX-512 Logical Instructions 4928//===----------------------------------------------------------------------===// 4929 4930defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 4931 SchedWriteVecLogic, HasAVX512, 1>; 4932defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 4933 SchedWriteVecLogic, HasAVX512, 1>; 4934defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 4935 SchedWriteVecLogic, HasAVX512, 1>; 4936defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 4937 SchedWriteVecLogic, HasAVX512>; 4938 4939let Predicates = [HasVLX] in { 4940 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 4941 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 4942 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 4943 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 4944 4945 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 4946 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 4947 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 4948 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 4949 4950 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 4951 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 4952 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 4953 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 4954 4955 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 4956 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 4957 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 4958 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 4959 4960 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 4961 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 4962 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 4963 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 4964 4965 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 4966 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 4967 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 4968 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 4969 4970 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 4971 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 4972 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 4973 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 4974 4975 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 4976 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 4977 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 4978 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 4979 4980 def : Pat<(and VR128X:$src1, 4981 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), 4982 (VPANDDZ128rmb VR128X:$src1, addr:$src2)>; 4983 def : Pat<(or VR128X:$src1, 4984 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), 4985 (VPORDZ128rmb VR128X:$src1, addr:$src2)>; 4986 def : Pat<(xor VR128X:$src1, 4987 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), 4988 (VPXORDZ128rmb VR128X:$src1, addr:$src2)>; 4989 def : Pat<(X86andnp VR128X:$src1, 4990 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), 4991 (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>; 4992 4993 def : Pat<(and VR128X:$src1, 4994 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), 4995 (VPANDQZ128rmb VR128X:$src1, addr:$src2)>; 4996 def : Pat<(or VR128X:$src1, 4997 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), 4998 (VPORQZ128rmb VR128X:$src1, addr:$src2)>; 4999 def : Pat<(xor VR128X:$src1, 5000 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), 5001 (VPXORQZ128rmb VR128X:$src1, addr:$src2)>; 5002 def : Pat<(X86andnp VR128X:$src1, 5003 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), 5004 (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>; 5005 5006 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5007 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5008 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5009 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5010 5011 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5012 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5013 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5014 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5015 5016 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5017 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5018 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5019 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5020 5021 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5022 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5023 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5024 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5025 5026 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5027 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5028 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5029 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5030 5031 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5032 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5033 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5034 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5035 5036 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5037 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5038 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5039 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5040 5041 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5042 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5043 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5044 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5045 5046 def : Pat<(and VR256X:$src1, 5047 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), 5048 (VPANDDZ256rmb VR256X:$src1, addr:$src2)>; 5049 def : Pat<(or VR256X:$src1, 5050 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), 5051 (VPORDZ256rmb VR256X:$src1, addr:$src2)>; 5052 def : Pat<(xor VR256X:$src1, 5053 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), 5054 (VPXORDZ256rmb VR256X:$src1, addr:$src2)>; 5055 def : Pat<(X86andnp VR256X:$src1, 5056 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), 5057 (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>; 5058 5059 def : Pat<(and VR256X:$src1, 5060 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), 5061 (VPANDQZ256rmb VR256X:$src1, addr:$src2)>; 5062 def : Pat<(or VR256X:$src1, 5063 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), 5064 (VPORQZ256rmb VR256X:$src1, addr:$src2)>; 5065 def : Pat<(xor VR256X:$src1, 5066 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), 5067 (VPXORQZ256rmb VR256X:$src1, addr:$src2)>; 5068 def : Pat<(X86andnp VR256X:$src1, 5069 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), 5070 (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>; 5071} 5072 5073let Predicates = [HasAVX512] in { 5074 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5075 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5076 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5077 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5078 5079 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5080 (VPORQZrr VR512:$src1, VR512:$src2)>; 5081 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5082 (VPORQZrr VR512:$src1, VR512:$src2)>; 5083 5084 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5085 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5086 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5087 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5088 5089 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5090 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5091 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5092 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5093 5094 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5095 (VPANDQZrm VR512:$src1, addr:$src2)>; 5096 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5097 (VPANDQZrm VR512:$src1, addr:$src2)>; 5098 5099 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5100 (VPORQZrm VR512:$src1, addr:$src2)>; 5101 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5102 (VPORQZrm VR512:$src1, addr:$src2)>; 5103 5104 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5105 (VPXORQZrm VR512:$src1, addr:$src2)>; 5106 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5107 (VPXORQZrm VR512:$src1, addr:$src2)>; 5108 5109 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5110 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5111 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5112 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5113 5114 def : Pat<(and VR512:$src1, 5115 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), 5116 (VPANDDZrmb VR512:$src1, addr:$src2)>; 5117 def : Pat<(or VR512:$src1, 5118 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), 5119 (VPORDZrmb VR512:$src1, addr:$src2)>; 5120 def : Pat<(xor VR512:$src1, 5121 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), 5122 (VPXORDZrmb VR512:$src1, addr:$src2)>; 5123 def : Pat<(X86andnp VR512:$src1, 5124 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), 5125 (VPANDNDZrmb VR512:$src1, addr:$src2)>; 5126 5127 def : Pat<(and VR512:$src1, 5128 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), 5129 (VPANDQZrmb VR512:$src1, addr:$src2)>; 5130 def : Pat<(or VR512:$src1, 5131 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), 5132 (VPORQZrmb VR512:$src1, addr:$src2)>; 5133 def : Pat<(xor VR512:$src1, 5134 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), 5135 (VPXORQZrmb VR512:$src1, addr:$src2)>; 5136 def : Pat<(X86andnp VR512:$src1, 5137 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), 5138 (VPANDNQZrmb VR512:$src1, addr:$src2)>; 5139} 5140 5141// Patterns to catch vselect with different type than logic op. 5142multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5143 X86VectorVTInfo _, 5144 X86VectorVTInfo IntInfo> { 5145 // Masked register-register logical operations. 5146 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5147 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5148 _.RC:$src0)), 5149 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5150 _.RC:$src1, _.RC:$src2)>; 5151 5152 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5153 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5154 _.ImmAllZerosV)), 5155 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5156 _.RC:$src2)>; 5157 5158 // Masked register-memory logical operations. 5159 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5160 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5161 (load addr:$src2)))), 5162 _.RC:$src0)), 5163 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5164 _.RC:$src1, addr:$src2)>; 5165 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5166 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5167 (load addr:$src2)))), 5168 _.ImmAllZerosV)), 5169 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5170 addr:$src2)>; 5171} 5172 5173multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5174 X86VectorVTInfo _, 5175 X86VectorVTInfo IntInfo> { 5176 // Register-broadcast logical operations. 5177 def : Pat<(IntInfo.VT (OpNode _.RC:$src1, 5178 (bitconvert (_.VT (X86VBroadcast 5179 (_.ScalarLdFrag addr:$src2)))))), 5180 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>; 5181 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5182 (bitconvert 5183 (IntInfo.VT (OpNode _.RC:$src1, 5184 (bitconvert (_.VT 5185 (X86VBroadcast 5186 (_.ScalarLdFrag addr:$src2))))))), 5187 _.RC:$src0)), 5188 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5189 _.RC:$src1, addr:$src2)>; 5190 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5191 (bitconvert 5192 (IntInfo.VT (OpNode _.RC:$src1, 5193 (bitconvert (_.VT 5194 (X86VBroadcast 5195 (_.ScalarLdFrag addr:$src2))))))), 5196 _.ImmAllZerosV)), 5197 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5198 _.RC:$src1, addr:$src2)>; 5199} 5200 5201multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5202 AVX512VLVectorVTInfo SelectInfo, 5203 AVX512VLVectorVTInfo IntInfo> { 5204let Predicates = [HasVLX] in { 5205 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5206 IntInfo.info128>; 5207 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5208 IntInfo.info256>; 5209} 5210let Predicates = [HasAVX512] in { 5211 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5212 IntInfo.info512>; 5213} 5214} 5215 5216multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5217 AVX512VLVectorVTInfo SelectInfo, 5218 AVX512VLVectorVTInfo IntInfo> { 5219let Predicates = [HasVLX] in { 5220 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5221 SelectInfo.info128, IntInfo.info128>; 5222 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5223 SelectInfo.info256, IntInfo.info256>; 5224} 5225let Predicates = [HasAVX512] in { 5226 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5227 SelectInfo.info512, IntInfo.info512>; 5228} 5229} 5230 5231multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5232 // i64 vselect with i32/i16/i8 logic op 5233 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5234 avx512vl_i32_info>; 5235 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5236 avx512vl_i16_info>; 5237 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5238 avx512vl_i8_info>; 5239 5240 // i32 vselect with i64/i16/i8 logic op 5241 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5242 avx512vl_i64_info>; 5243 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5244 avx512vl_i16_info>; 5245 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5246 avx512vl_i8_info>; 5247 5248 // f32 vselect with i64/i32/i16/i8 logic op 5249 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5250 avx512vl_i64_info>; 5251 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5252 avx512vl_i32_info>; 5253 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5254 avx512vl_i16_info>; 5255 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5256 avx512vl_i8_info>; 5257 5258 // f64 vselect with i64/i32/i16/i8 logic op 5259 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5260 avx512vl_i64_info>; 5261 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5262 avx512vl_i32_info>; 5263 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5264 avx512vl_i16_info>; 5265 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5266 avx512vl_i8_info>; 5267 5268 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5269 avx512vl_f32_info, 5270 avx512vl_i32_info>; 5271 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5272 avx512vl_f64_info, 5273 avx512vl_i64_info>; 5274} 5275 5276defm : avx512_logical_lowering_types<"VPAND", and>; 5277defm : avx512_logical_lowering_types<"VPOR", or>; 5278defm : avx512_logical_lowering_types<"VPXOR", xor>; 5279defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5280 5281//===----------------------------------------------------------------------===// 5282// AVX-512 FP arithmetic 5283//===----------------------------------------------------------------------===// 5284 5285multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5286 SDNode OpNode, SDNode VecNode, 5287 X86FoldableSchedWrite sched, bit IsCommutable> { 5288 let ExeDomain = _.ExeDomain in { 5289 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5290 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5291 "$src2, $src1", "$src1, $src2", 5292 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5293 Sched<[sched]>; 5294 5295 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5296 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5297 "$src2, $src1", "$src1, $src2", 5298 (_.VT (VecNode _.RC:$src1, 5299 _.ScalarIntMemCPat:$src2))>, 5300 Sched<[sched.Folded, sched.ReadAfterFold]>; 5301 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5302 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5303 (ins _.FRC:$src1, _.FRC:$src2), 5304 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5305 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5306 Sched<[sched]> { 5307 let isCommutable = IsCommutable; 5308 } 5309 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5310 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5311 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5312 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5313 (_.ScalarLdFrag addr:$src2)))]>, 5314 Sched<[sched.Folded, sched.ReadAfterFold]>; 5315 } 5316 } 5317} 5318 5319multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5320 SDNode VecNode, X86FoldableSchedWrite sched, 5321 bit IsCommutable = 0> { 5322 let ExeDomain = _.ExeDomain in 5323 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5324 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5325 "$rc, $src2, $src1", "$src1, $src2, $rc", 5326 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5327 (i32 timm:$rc))>, 5328 EVEX_B, EVEX_RC, Sched<[sched]>; 5329} 5330multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5331 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5332 X86FoldableSchedWrite sched, bit IsCommutable> { 5333 let ExeDomain = _.ExeDomain in { 5334 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5335 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5336 "$src2, $src1", "$src1, $src2", 5337 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5338 Sched<[sched]>; 5339 5340 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5341 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5342 "$src2, $src1", "$src1, $src2", 5343 (_.VT (VecNode _.RC:$src1, 5344 _.ScalarIntMemCPat:$src2))>, 5345 Sched<[sched.Folded, sched.ReadAfterFold]>; 5346 5347 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5348 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5349 (ins _.FRC:$src1, _.FRC:$src2), 5350 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5351 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5352 Sched<[sched]> { 5353 let isCommutable = IsCommutable; 5354 } 5355 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5356 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5357 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5358 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5359 (_.ScalarLdFrag addr:$src2)))]>, 5360 Sched<[sched.Folded, sched.ReadAfterFold]>; 5361 } 5362 5363 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5364 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5365 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5366 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5367 EVEX_B, Sched<[sched]>; 5368 } 5369} 5370 5371multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 5372 SDNode VecNode, SDNode RndNode, 5373 X86SchedWriteSizes sched, bit IsCommutable> { 5374 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5375 sched.PS.Scl, IsCommutable>, 5376 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5377 sched.PS.Scl, IsCommutable>, 5378 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5379 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5380 sched.PD.Scl, IsCommutable>, 5381 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5382 sched.PD.Scl, IsCommutable>, 5383 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5384} 5385 5386multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5387 SDNode VecNode, SDNode SaeNode, 5388 X86SchedWriteSizes sched, bit IsCommutable> { 5389 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5390 VecNode, SaeNode, sched.PS.Scl, IsCommutable>, 5391 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5392 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5393 VecNode, SaeNode, sched.PD.Scl, IsCommutable>, 5394 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5395} 5396defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds, 5397 SchedWriteFAddSizes, 1>; 5398defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds, 5399 SchedWriteFMulSizes, 1>; 5400defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds, 5401 SchedWriteFAddSizes, 0>; 5402defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds, 5403 SchedWriteFDivSizes, 0>; 5404defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5405 SchedWriteFCmpSizes, 0>; 5406defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5407 SchedWriteFCmpSizes, 0>; 5408 5409// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5410// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5411multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5412 X86VectorVTInfo _, SDNode OpNode, 5413 X86FoldableSchedWrite sched> { 5414 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5415 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5416 (ins _.FRC:$src1, _.FRC:$src2), 5417 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5418 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5419 Sched<[sched]> { 5420 let isCommutable = 1; 5421 } 5422 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5423 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5424 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5425 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5426 (_.ScalarLdFrag addr:$src2)))]>, 5427 Sched<[sched.Folded, sched.ReadAfterFold]>; 5428 } 5429} 5430defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5431 SchedWriteFCmp.Scl>, XS, EVEX_4V, 5432 VEX_LIG, EVEX_CD8<32, CD8VT1>; 5433 5434defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5435 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, 5436 VEX_LIG, EVEX_CD8<64, CD8VT1>; 5437 5438defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5439 SchedWriteFCmp.Scl>, XS, EVEX_4V, 5440 VEX_LIG, EVEX_CD8<32, CD8VT1>; 5441 5442defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5443 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, 5444 VEX_LIG, EVEX_CD8<64, CD8VT1>; 5445 5446multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5447 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5448 bit IsCommutable, 5449 bit IsKCommutable = IsCommutable> { 5450 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5451 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5452 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5453 "$src2, $src1", "$src1, $src2", 5454 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 5455 IsKCommutable, IsKCommutable>, 5456 EVEX_4V, Sched<[sched]>; 5457 let mayLoad = 1 in { 5458 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5459 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5460 "$src2, $src1", "$src1, $src2", 5461 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5462 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5463 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5464 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5465 "${src2}"##_.BroadcastStr##", $src1", 5466 "$src1, ${src2}"##_.BroadcastStr, 5467 (OpNode _.RC:$src1, (_.VT (X86VBroadcast 5468 (_.ScalarLdFrag addr:$src2))))>, 5469 EVEX_4V, EVEX_B, 5470 Sched<[sched.Folded, sched.ReadAfterFold]>; 5471 } 5472 } 5473} 5474 5475multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5476 SDPatternOperator OpNodeRnd, 5477 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5478 let ExeDomain = _.ExeDomain in 5479 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5480 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, 5481 "$rc, $src2, $src1", "$src1, $src2, $rc", 5482 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>, 5483 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5484} 5485 5486multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5487 SDPatternOperator OpNodeSAE, 5488 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5489 let ExeDomain = _.ExeDomain in 5490 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5491 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5492 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5493 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5494 EVEX_4V, EVEX_B, Sched<[sched]>; 5495} 5496 5497multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5498 Predicate prd, X86SchedWriteSizes sched, 5499 bit IsCommutable = 0, 5500 bit IsPD128Commutable = IsCommutable> { 5501 let Predicates = [prd] in { 5502 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, 5503 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5504 EVEX_CD8<32, CD8VF>; 5505 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, 5506 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5507 EVEX_CD8<64, CD8VF>; 5508 } 5509 5510 // Define only if AVX512VL feature is present. 5511 let Predicates = [prd, HasVLX] in { 5512 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, 5513 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5514 EVEX_CD8<32, CD8VF>; 5515 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, 5516 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5517 EVEX_CD8<32, CD8VF>; 5518 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, 5519 sched.PD.XMM, IsPD128Commutable, 5520 IsCommutable>, EVEX_V128, PD, VEX_W, 5521 EVEX_CD8<64, CD8VF>; 5522 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, 5523 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5524 EVEX_CD8<64, CD8VF>; 5525 } 5526} 5527 5528multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5529 X86SchedWriteSizes sched> { 5530 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5531 v16f32_info>, 5532 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5533 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5534 v8f64_info>, 5535 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5536} 5537 5538multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5539 X86SchedWriteSizes sched> { 5540 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5541 v16f32_info>, 5542 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5543 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5544 v8f64_info>, 5545 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5546} 5547 5548defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, 5549 SchedWriteFAddSizes, 1>, 5550 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5551defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, 5552 SchedWriteFMulSizes, 1>, 5553 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5554defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, 5555 SchedWriteFAddSizes>, 5556 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5557defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, 5558 SchedWriteFDivSizes>, 5559 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5560defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, 5561 SchedWriteFCmpSizes, 0>, 5562 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5563defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, 5564 SchedWriteFCmpSizes, 0>, 5565 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5566let isCodeGenOnly = 1 in { 5567 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, 5568 SchedWriteFCmpSizes, 1>; 5569 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, 5570 SchedWriteFCmpSizes, 1>; 5571} 5572defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, 5573 SchedWriteFLogicSizes, 1>; 5574defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, 5575 SchedWriteFLogicSizes, 0>; 5576defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, 5577 SchedWriteFLogicSizes, 1>; 5578defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, 5579 SchedWriteFLogicSizes, 1>; 5580 5581multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5582 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5583 let ExeDomain = _.ExeDomain in { 5584 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5585 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5586 "$src2, $src1", "$src1, $src2", 5587 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5588 EVEX_4V, Sched<[sched]>; 5589 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5590 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5591 "$src2, $src1", "$src1, $src2", 5592 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5593 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5594 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5595 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5596 "${src2}"##_.BroadcastStr##", $src1", 5597 "$src1, ${src2}"##_.BroadcastStr, 5598 (OpNode _.RC:$src1, (_.VT (X86VBroadcast 5599 (_.ScalarLdFrag addr:$src2))))>, 5600 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5601 } 5602} 5603 5604multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5605 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5606 let ExeDomain = _.ExeDomain in { 5607 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5609 "$src2, $src1", "$src1, $src2", 5610 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5611 Sched<[sched]>; 5612 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5613 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix, 5614 "$src2, $src1", "$src1, $src2", 5615 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>, 5616 Sched<[sched.Folded, sched.ReadAfterFold]>; 5617 } 5618} 5619 5620multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5621 X86SchedWriteWidths sched> { 5622 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5623 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5624 EVEX_V512, EVEX_CD8<32, CD8VF>; 5625 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5626 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5627 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 5628 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5629 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, 5630 X86scalefsRnd, sched.Scl>, 5631 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5632 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5633 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, 5634 X86scalefsRnd, sched.Scl>, 5635 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W; 5636 5637 // Define only if AVX512VL feature is present. 5638 let Predicates = [HasVLX] in { 5639 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5640 EVEX_V128, EVEX_CD8<32, CD8VF>; 5641 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5642 EVEX_V256, EVEX_CD8<32, CD8VF>; 5643 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5644 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 5645 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5646 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 5647 } 5648} 5649defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 5650 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; 5651 5652//===----------------------------------------------------------------------===// 5653// AVX-512 VPTESTM instructions 5654//===----------------------------------------------------------------------===// 5655 5656multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5657 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5658 string Name> { 5659 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5660 // There are just too many permuations due to commutability and bitcasts. 5661 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5662 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5663 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5664 "$src2, $src1", "$src1, $src2", 5665 (null_frag), (null_frag), 1>, 5666 EVEX_4V, Sched<[sched]>; 5667 let mayLoad = 1 in 5668 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5669 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5670 "$src2, $src1", "$src1, $src2", 5671 (null_frag), (null_frag)>, 5672 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5673 Sched<[sched.Folded, sched.ReadAfterFold]>; 5674 } 5675} 5676 5677multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5678 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5679 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5680 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5681 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5682 "${src2}"##_.BroadcastStr##", $src1", 5683 "$src1, ${src2}"##_.BroadcastStr, 5684 (null_frag), (null_frag)>, 5685 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5686 Sched<[sched.Folded, sched.ReadAfterFold]>; 5687} 5688 5689multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5690 X86SchedWriteWidths sched, 5691 AVX512VLVectorVTInfo _> { 5692 let Predicates = [HasAVX512] in 5693 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>, 5694 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5695 5696 let Predicates = [HasAVX512, HasVLX] in { 5697 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>, 5698 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5699 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>, 5700 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5701 } 5702} 5703 5704multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5705 X86SchedWriteWidths sched> { 5706 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5707 avx512vl_i32_info>; 5708 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5709 avx512vl_i64_info>, VEX_W; 5710} 5711 5712multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5713 X86SchedWriteWidths sched> { 5714 let Predicates = [HasBWI] in { 5715 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5716 v32i16_info, NAME#"W">, EVEX_V512, VEX_W; 5717 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5718 v64i8_info, NAME#"B">, EVEX_V512; 5719 } 5720 let Predicates = [HasVLX, HasBWI] in { 5721 5722 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5723 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W; 5724 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5725 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W; 5726 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5727 v32i8x_info, NAME#"B">, EVEX_V256; 5728 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5729 v16i8x_info, NAME#"B">, EVEX_V128; 5730 } 5731} 5732 5733multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5734 X86SchedWriteWidths sched> : 5735 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5736 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5737 5738defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5739 SchedWriteVecLogic>, T8PD; 5740defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5741 SchedWriteVecLogic>, T8XS; 5742 5743//===----------------------------------------------------------------------===// 5744// AVX-512 Shift instructions 5745//===----------------------------------------------------------------------===// 5746 5747multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5748 string OpcodeStr, SDNode OpNode, 5749 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5750 let ExeDomain = _.ExeDomain in { 5751 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5752 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5753 "$src2, $src1", "$src1, $src2", 5754 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>, 5755 Sched<[sched]>; 5756 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5757 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5758 "$src2, $src1", "$src1, $src2", 5759 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5760 (i8 imm:$src2)))>, 5761 Sched<[sched.Folded]>; 5762 } 5763} 5764 5765multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5766 string OpcodeStr, SDNode OpNode, 5767 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5768 let ExeDomain = _.ExeDomain in 5769 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5770 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5771 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", 5772 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>, 5773 EVEX_B, Sched<[sched.Folded]>; 5774} 5775 5776multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5777 X86FoldableSchedWrite sched, ValueType SrcVT, 5778 X86VectorVTInfo _> { 5779 // src2 is always 128-bit 5780 let ExeDomain = _.ExeDomain in { 5781 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5782 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5783 "$src2, $src1", "$src1, $src2", 5784 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5785 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5786 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5787 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5788 "$src2, $src1", "$src1, $src2", 5789 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5790 AVX512BIBase, 5791 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5792 } 5793} 5794 5795multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5796 X86SchedWriteWidths sched, ValueType SrcVT, 5797 AVX512VLVectorVTInfo VTInfo, 5798 Predicate prd> { 5799 let Predicates = [prd] in 5800 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5801 VTInfo.info512>, EVEX_V512, 5802 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5803 let Predicates = [prd, HasVLX] in { 5804 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5805 VTInfo.info256>, EVEX_V256, 5806 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5807 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5808 VTInfo.info128>, EVEX_V128, 5809 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5810 } 5811} 5812 5813multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5814 string OpcodeStr, SDNode OpNode, 5815 X86SchedWriteWidths sched, 5816 bit NotEVEX2VEXConvertibleQ = 0> { 5817 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5818 avx512vl_i32_info, HasAVX512>; 5819 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5820 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5821 avx512vl_i64_info, HasAVX512>, VEX_W; 5822 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5823 avx512vl_i16_info, HasBWI>; 5824} 5825 5826multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5827 string OpcodeStr, SDNode OpNode, 5828 X86SchedWriteWidths sched, 5829 AVX512VLVectorVTInfo VTInfo> { 5830 let Predicates = [HasAVX512] in 5831 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5832 sched.ZMM, VTInfo.info512>, 5833 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5834 VTInfo.info512>, EVEX_V512; 5835 let Predicates = [HasAVX512, HasVLX] in { 5836 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5837 sched.YMM, VTInfo.info256>, 5838 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5839 VTInfo.info256>, EVEX_V256; 5840 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5841 sched.XMM, VTInfo.info128>, 5842 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5843 VTInfo.info128>, EVEX_V128; 5844 } 5845} 5846 5847multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5848 string OpcodeStr, SDNode OpNode, 5849 X86SchedWriteWidths sched> { 5850 let Predicates = [HasBWI] in 5851 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5852 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 5853 let Predicates = [HasVLX, HasBWI] in { 5854 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5855 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 5856 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5857 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 5858 } 5859} 5860 5861multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5862 Format ImmFormR, Format ImmFormM, 5863 string OpcodeStr, SDNode OpNode, 5864 X86SchedWriteWidths sched, 5865 bit NotEVEX2VEXConvertibleQ = 0> { 5866 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 5867 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 5868 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5869 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 5870 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 5871} 5872 5873defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 5874 SchedWriteVecShiftImm>, 5875 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 5876 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5877 5878defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 5879 SchedWriteVecShiftImm>, 5880 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 5881 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5882 5883defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 5884 SchedWriteVecShiftImm, 1>, 5885 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 5886 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5887 5888defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 5889 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5890defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 5891 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5892 5893defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 5894 SchedWriteVecShift>; 5895defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 5896 SchedWriteVecShift, 1>; 5897defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 5898 SchedWriteVecShift>; 5899 5900// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 5901let Predicates = [HasAVX512, NoVLX] in { 5902 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 5903 (EXTRACT_SUBREG (v8i64 5904 (VPSRAQZrr 5905 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5906 VR128X:$src2)), sub_ymm)>; 5907 5908 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5909 (EXTRACT_SUBREG (v8i64 5910 (VPSRAQZrr 5911 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5912 VR128X:$src2)), sub_xmm)>; 5913 5914 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))), 5915 (EXTRACT_SUBREG (v8i64 5916 (VPSRAQZri 5917 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5918 imm:$src2)), sub_ymm)>; 5919 5920 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))), 5921 (EXTRACT_SUBREG (v8i64 5922 (VPSRAQZri 5923 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5924 imm:$src2)), sub_xmm)>; 5925} 5926 5927//===-------------------------------------------------------------------===// 5928// Variable Bit Shifts 5929//===-------------------------------------------------------------------===// 5930 5931multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 5932 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5933 let ExeDomain = _.ExeDomain in { 5934 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5935 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5936 "$src2, $src1", "$src1, $src2", 5937 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 5938 AVX5128IBase, EVEX_4V, Sched<[sched]>; 5939 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5940 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5941 "$src2, $src1", "$src1, $src2", 5942 (_.VT (OpNode _.RC:$src1, 5943 (_.VT (_.LdFrag addr:$src2))))>, 5944 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5945 Sched<[sched.Folded, sched.ReadAfterFold]>; 5946 } 5947} 5948 5949multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5950 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5951 let ExeDomain = _.ExeDomain in 5952 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5953 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5954 "${src2}"##_.BroadcastStr##", $src1", 5955 "$src1, ${src2}"##_.BroadcastStr, 5956 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast 5957 (_.ScalarLdFrag addr:$src2)))))>, 5958 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5959 Sched<[sched.Folded, sched.ReadAfterFold]>; 5960} 5961 5962multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5963 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 5964 let Predicates = [HasAVX512] in 5965 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 5966 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 5967 5968 let Predicates = [HasAVX512, HasVLX] in { 5969 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 5970 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 5971 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 5972 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 5973 } 5974} 5975 5976multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 5977 SDNode OpNode, X86SchedWriteWidths sched> { 5978 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 5979 avx512vl_i32_info>; 5980 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 5981 avx512vl_i64_info>, VEX_W; 5982} 5983 5984// Use 512bit version to implement 128/256 bit in case NoVLX. 5985multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 5986 SDNode OpNode, list<Predicate> p> { 5987 let Predicates = p in { 5988 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 5989 (_.info256.VT _.info256.RC:$src2))), 5990 (EXTRACT_SUBREG 5991 (!cast<Instruction>(OpcodeStr#"Zrr") 5992 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5993 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5994 sub_ymm)>; 5995 5996 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 5997 (_.info128.VT _.info128.RC:$src2))), 5998 (EXTRACT_SUBREG 5999 (!cast<Instruction>(OpcodeStr#"Zrr") 6000 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6001 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6002 sub_xmm)>; 6003 } 6004} 6005multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6006 SDNode OpNode, X86SchedWriteWidths sched> { 6007 let Predicates = [HasBWI] in 6008 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6009 EVEX_V512, VEX_W; 6010 let Predicates = [HasVLX, HasBWI] in { 6011 6012 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6013 EVEX_V256, VEX_W; 6014 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6015 EVEX_V128, VEX_W; 6016 } 6017} 6018 6019defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6020 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6021 6022defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6023 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6024 6025defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6026 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6027 6028defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6029defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6030 6031defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6032defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6033defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6034defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6035 6036 6037// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6038let Predicates = [HasAVX512, NoVLX] in { 6039 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6040 (EXTRACT_SUBREG (v8i64 6041 (VPROLVQZrr 6042 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6043 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6044 sub_xmm)>; 6045 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6046 (EXTRACT_SUBREG (v8i64 6047 (VPROLVQZrr 6048 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6049 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6050 sub_ymm)>; 6051 6052 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6053 (EXTRACT_SUBREG (v16i32 6054 (VPROLVDZrr 6055 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6056 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6057 sub_xmm)>; 6058 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6059 (EXTRACT_SUBREG (v16i32 6060 (VPROLVDZrr 6061 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6062 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6063 sub_ymm)>; 6064 6065 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), 6066 (EXTRACT_SUBREG (v8i64 6067 (VPROLQZri 6068 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6069 imm:$src2)), sub_xmm)>; 6070 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), 6071 (EXTRACT_SUBREG (v8i64 6072 (VPROLQZri 6073 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6074 imm:$src2)), sub_ymm)>; 6075 6076 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), 6077 (EXTRACT_SUBREG (v16i32 6078 (VPROLDZri 6079 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6080 imm:$src2)), sub_xmm)>; 6081 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), 6082 (EXTRACT_SUBREG (v16i32 6083 (VPROLDZri 6084 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6085 imm:$src2)), sub_ymm)>; 6086} 6087 6088// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6089let Predicates = [HasAVX512, NoVLX] in { 6090 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6091 (EXTRACT_SUBREG (v8i64 6092 (VPRORVQZrr 6093 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6094 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6095 sub_xmm)>; 6096 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6097 (EXTRACT_SUBREG (v8i64 6098 (VPRORVQZrr 6099 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6100 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6101 sub_ymm)>; 6102 6103 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6104 (EXTRACT_SUBREG (v16i32 6105 (VPRORVDZrr 6106 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6107 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6108 sub_xmm)>; 6109 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6110 (EXTRACT_SUBREG (v16i32 6111 (VPRORVDZrr 6112 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6113 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6114 sub_ymm)>; 6115 6116 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), 6117 (EXTRACT_SUBREG (v8i64 6118 (VPRORQZri 6119 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6120 imm:$src2)), sub_xmm)>; 6121 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), 6122 (EXTRACT_SUBREG (v8i64 6123 (VPRORQZri 6124 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6125 imm:$src2)), sub_ymm)>; 6126 6127 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), 6128 (EXTRACT_SUBREG (v16i32 6129 (VPRORDZri 6130 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6131 imm:$src2)), sub_xmm)>; 6132 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), 6133 (EXTRACT_SUBREG (v16i32 6134 (VPRORDZri 6135 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6136 imm:$src2)), sub_ymm)>; 6137} 6138 6139//===-------------------------------------------------------------------===// 6140// 1-src variable permutation VPERMW/D/Q 6141//===-------------------------------------------------------------------===// 6142 6143multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6144 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6145 let Predicates = [HasAVX512] in 6146 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6147 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6148 6149 let Predicates = [HasAVX512, HasVLX] in 6150 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6151 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6152} 6153 6154multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6155 string OpcodeStr, SDNode OpNode, 6156 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6157 let Predicates = [HasAVX512] in 6158 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6159 sched, VTInfo.info512>, 6160 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6161 sched, VTInfo.info512>, EVEX_V512; 6162 let Predicates = [HasAVX512, HasVLX] in 6163 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6164 sched, VTInfo.info256>, 6165 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6166 sched, VTInfo.info256>, EVEX_V256; 6167} 6168 6169multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6170 Predicate prd, SDNode OpNode, 6171 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6172 let Predicates = [prd] in 6173 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6174 EVEX_V512 ; 6175 let Predicates = [HasVLX, prd] in { 6176 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6177 EVEX_V256 ; 6178 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6179 EVEX_V128 ; 6180 } 6181} 6182 6183defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6184 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6185defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6186 WriteVarShuffle256, avx512vl_i8_info>; 6187 6188defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6189 WriteVarShuffle256, avx512vl_i32_info>; 6190defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6191 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6192defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6193 WriteFVarShuffle256, avx512vl_f32_info>; 6194defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6195 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6196 6197defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6198 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6199 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6200defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6201 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6202 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6203 6204//===----------------------------------------------------------------------===// 6205// AVX-512 - VPERMIL 6206//===----------------------------------------------------------------------===// 6207 6208multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6209 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6210 X86VectorVTInfo Ctrl> { 6211 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6212 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6213 "$src2, $src1", "$src1, $src2", 6214 (_.VT (OpNode _.RC:$src1, 6215 (Ctrl.VT Ctrl.RC:$src2)))>, 6216 T8PD, EVEX_4V, Sched<[sched]>; 6217 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6218 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6219 "$src2, $src1", "$src1, $src2", 6220 (_.VT (OpNode 6221 _.RC:$src1, 6222 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6223 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6224 Sched<[sched.Folded, sched.ReadAfterFold]>; 6225 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6226 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6227 "${src2}"##_.BroadcastStr##", $src1", 6228 "$src1, ${src2}"##_.BroadcastStr, 6229 (_.VT (OpNode 6230 _.RC:$src1, 6231 (Ctrl.VT (X86VBroadcast 6232 (Ctrl.ScalarLdFrag addr:$src2)))))>, 6233 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6234 Sched<[sched.Folded, sched.ReadAfterFold]>; 6235} 6236 6237multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6238 X86SchedWriteWidths sched, 6239 AVX512VLVectorVTInfo _, 6240 AVX512VLVectorVTInfo Ctrl> { 6241 let Predicates = [HasAVX512] in { 6242 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6243 _.info512, Ctrl.info512>, EVEX_V512; 6244 } 6245 let Predicates = [HasAVX512, HasVLX] in { 6246 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6247 _.info128, Ctrl.info128>, EVEX_V128; 6248 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6249 _.info256, Ctrl.info256>, EVEX_V256; 6250 } 6251} 6252 6253multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6254 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6255 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6256 _, Ctrl>; 6257 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6258 X86VPermilpi, SchedWriteFShuffle, _>, 6259 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6260} 6261 6262let ExeDomain = SSEPackedSingle in 6263defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6264 avx512vl_i32_info>; 6265let ExeDomain = SSEPackedDouble in 6266defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6267 avx512vl_i64_info>, VEX_W1X; 6268 6269//===----------------------------------------------------------------------===// 6270// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6271//===----------------------------------------------------------------------===// 6272 6273defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6274 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6275 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6276defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6277 X86PShufhw, SchedWriteShuffle>, 6278 EVEX, AVX512XSIi8Base; 6279defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6280 X86PShuflw, SchedWriteShuffle>, 6281 EVEX, AVX512XDIi8Base; 6282 6283//===----------------------------------------------------------------------===// 6284// AVX-512 - VPSHUFB 6285//===----------------------------------------------------------------------===// 6286 6287multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6288 X86SchedWriteWidths sched> { 6289 let Predicates = [HasBWI] in 6290 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6291 EVEX_V512; 6292 6293 let Predicates = [HasVLX, HasBWI] in { 6294 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6295 EVEX_V256; 6296 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6297 EVEX_V128; 6298 } 6299} 6300 6301defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6302 SchedWriteVarShuffle>, VEX_WIG; 6303 6304//===----------------------------------------------------------------------===// 6305// Move Low to High and High to Low packed FP Instructions 6306//===----------------------------------------------------------------------===// 6307 6308def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6309 (ins VR128X:$src1, VR128X:$src2), 6310 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6311 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6312 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6313let isCommutable = 1 in 6314def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6315 (ins VR128X:$src1, VR128X:$src2), 6316 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6317 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6318 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6319 6320//===----------------------------------------------------------------------===// 6321// VMOVHPS/PD VMOVLPS Instructions 6322// All patterns was taken from SSS implementation. 6323//===----------------------------------------------------------------------===// 6324 6325multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6326 SDPatternOperator OpNode, 6327 X86VectorVTInfo _> { 6328 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6329 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6330 (ins _.RC:$src1, f64mem:$src2), 6331 !strconcat(OpcodeStr, 6332 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6333 [(set _.RC:$dst, 6334 (OpNode _.RC:$src1, 6335 (_.VT (bitconvert 6336 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6337 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6338} 6339 6340// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6341// SSE1. And MOVLPS pattern is even more complex. 6342defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6343 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6344defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6345 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6346defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6347 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6348defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6349 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6350 6351let Predicates = [HasAVX512] in { 6352 // VMOVHPD patterns 6353 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, 6354 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 6355 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6356 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6357 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6358 6359 // VMOVLPD patterns 6360 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6361 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6362} 6363 6364let SchedRW = [WriteFStore] in { 6365let mayStore = 1, hasSideEffects = 0 in 6366def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6367 (ins f64mem:$dst, VR128X:$src), 6368 "vmovhps\t{$src, $dst|$dst, $src}", 6369 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6370def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6371 (ins f64mem:$dst, VR128X:$src), 6372 "vmovhpd\t{$src, $dst|$dst, $src}", 6373 [(store (f64 (extractelt 6374 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6375 (iPTR 0))), addr:$dst)]>, 6376 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6377let mayStore = 1, hasSideEffects = 0 in 6378def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6379 (ins f64mem:$dst, VR128X:$src), 6380 "vmovlps\t{$src, $dst|$dst, $src}", 6381 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6382def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6383 (ins f64mem:$dst, VR128X:$src), 6384 "vmovlpd\t{$src, $dst|$dst, $src}", 6385 [(store (f64 (extractelt (v2f64 VR128X:$src), 6386 (iPTR 0))), addr:$dst)]>, 6387 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6388} // SchedRW 6389 6390let Predicates = [HasAVX512] in { 6391 // VMOVHPD patterns 6392 def : Pat<(store (f64 (extractelt 6393 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6394 (iPTR 0))), addr:$dst), 6395 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6396} 6397//===----------------------------------------------------------------------===// 6398// FMA - Fused Multiply Operations 6399// 6400 6401multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6402 X86FoldableSchedWrite sched, 6403 X86VectorVTInfo _, string Suff> { 6404 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6405 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6406 (ins _.RC:$src2, _.RC:$src3), 6407 OpcodeStr, "$src3, $src2", "$src2, $src3", 6408 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6409 AVX512FMA3Base, Sched<[sched]>; 6410 6411 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6412 (ins _.RC:$src2, _.MemOp:$src3), 6413 OpcodeStr, "$src3, $src2", "$src2, $src3", 6414 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6415 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6416 6417 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6418 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6419 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6420 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6421 (OpNode _.RC:$src2, 6422 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>, 6423 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6424 } 6425} 6426 6427multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6428 X86FoldableSchedWrite sched, 6429 X86VectorVTInfo _, string Suff> { 6430 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6431 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6432 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6433 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6434 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6435 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6436} 6437 6438multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6439 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6440 AVX512VLVectorVTInfo _, string Suff> { 6441 let Predicates = [HasAVX512] in { 6442 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6443 _.info512, Suff>, 6444 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6445 _.info512, Suff>, 6446 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6447 } 6448 let Predicates = [HasVLX, HasAVX512] in { 6449 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM, 6450 _.info256, Suff>, 6451 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6452 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM, 6453 _.info128, Suff>, 6454 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6455 } 6456} 6457 6458multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6459 SDNode OpNodeRnd> { 6460 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6461 SchedWriteFMA, avx512vl_f32_info, "PS">; 6462 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6463 SchedWriteFMA, avx512vl_f64_info, "PD">, 6464 VEX_W; 6465} 6466 6467defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; 6468defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; 6469defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; 6470defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; 6471defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; 6472defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; 6473 6474 6475multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6476 X86FoldableSchedWrite sched, 6477 X86VectorVTInfo _, string Suff> { 6478 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6479 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6480 (ins _.RC:$src2, _.RC:$src3), 6481 OpcodeStr, "$src3, $src2", "$src2, $src3", 6482 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1, 6483 vselect, 1>, AVX512FMA3Base, Sched<[sched]>; 6484 6485 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6486 (ins _.RC:$src2, _.MemOp:$src3), 6487 OpcodeStr, "$src3, $src2", "$src2, $src3", 6488 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6489 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6490 6491 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6492 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6493 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6494 "$src2, ${src3}"##_.BroadcastStr, 6495 (_.VT (OpNode _.RC:$src2, 6496 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 6497 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, 6498 Sched<[sched.Folded, sched.ReadAfterFold]>; 6499 } 6500} 6501 6502multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6503 X86FoldableSchedWrite sched, 6504 X86VectorVTInfo _, string Suff> { 6505 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6506 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6507 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6508 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6509 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6510 1, 1, vselect, 1>, 6511 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6512} 6513 6514multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6515 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6516 AVX512VLVectorVTInfo _, string Suff> { 6517 let Predicates = [HasAVX512] in { 6518 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6519 _.info512, Suff>, 6520 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6521 _.info512, Suff>, 6522 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6523 } 6524 let Predicates = [HasVLX, HasAVX512] in { 6525 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM, 6526 _.info256, Suff>, 6527 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6528 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM, 6529 _.info128, Suff>, 6530 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6531 } 6532} 6533 6534multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6535 SDNode OpNodeRnd > { 6536 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6537 SchedWriteFMA, avx512vl_f32_info, "PS">; 6538 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6539 SchedWriteFMA, avx512vl_f64_info, "PD">, 6540 VEX_W; 6541} 6542 6543defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; 6544defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; 6545defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; 6546defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; 6547defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; 6548defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; 6549 6550multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6551 X86FoldableSchedWrite sched, 6552 X86VectorVTInfo _, string Suff> { 6553 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6554 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6555 (ins _.RC:$src2, _.RC:$src3), 6556 OpcodeStr, "$src3, $src2", "$src2, $src3", 6557 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>, 6558 AVX512FMA3Base, Sched<[sched]>; 6559 6560 // Pattern is 312 order so that the load is in a different place from the 6561 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6562 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6563 (ins _.RC:$src2, _.MemOp:$src3), 6564 OpcodeStr, "$src3, $src2", "$src2, $src3", 6565 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6566 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6567 6568 // Pattern is 312 order so that the load is in a different place from the 6569 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6570 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6571 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6572 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6573 "$src2, ${src3}"##_.BroadcastStr, 6574 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 6575 _.RC:$src1, _.RC:$src2)), 1, 0>, 6576 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6577 } 6578} 6579 6580multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6581 X86FoldableSchedWrite sched, 6582 X86VectorVTInfo _, string Suff> { 6583 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6584 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6585 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6586 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6587 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6588 1, 1, vselect, 1>, 6589 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6590} 6591 6592multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6593 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6594 AVX512VLVectorVTInfo _, string Suff> { 6595 let Predicates = [HasAVX512] in { 6596 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6597 _.info512, Suff>, 6598 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6599 _.info512, Suff>, 6600 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6601 } 6602 let Predicates = [HasVLX, HasAVX512] in { 6603 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM, 6604 _.info256, Suff>, 6605 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6606 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM, 6607 _.info128, Suff>, 6608 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6609 } 6610} 6611 6612multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6613 SDNode OpNodeRnd > { 6614 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6615 SchedWriteFMA, avx512vl_f32_info, "PS">; 6616 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6617 SchedWriteFMA, avx512vl_f64_info, "PD">, 6618 VEX_W; 6619} 6620 6621defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; 6622defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; 6623defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; 6624defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; 6625defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; 6626defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; 6627 6628// Scalar FMA 6629multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6630 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6631let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6632 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6633 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6634 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6635 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>; 6636 6637 let mayLoad = 1 in 6638 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6639 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6640 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6641 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>; 6642 6643 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6644 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6645 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6646 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6647 6648 let isCodeGenOnly = 1, isCommutable = 1 in { 6649 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6650 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6651 !strconcat(OpcodeStr, 6652 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6653 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>; 6654 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst), 6655 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6656 !strconcat(OpcodeStr, 6657 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6658 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>; 6659 6660 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6661 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6662 !strconcat(OpcodeStr, 6663 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6664 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6665 Sched<[SchedWriteFMA.Scl]>; 6666 }// isCodeGenOnly = 1 6667}// Constraints = "$src1 = $dst" 6668} 6669 6670multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6671 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 6672 X86VectorVTInfo _, string SUFF> { 6673 let ExeDomain = _.ExeDomain in { 6674 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6675 // Operands for intrinsic are in 123 order to preserve passthu 6676 // semantics. 6677 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6678 _.FRC:$src3))), 6679 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6680 (_.ScalarLdFrag addr:$src3)))), 6681 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6682 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6683 6684 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6685 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6686 _.FRC:$src1))), 6687 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6688 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6689 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6690 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6691 6692 // One pattern is 312 order so that the load is in a different place from the 6693 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6694 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6695 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6696 _.FRC:$src2))), 6697 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6698 _.FRC:$src1, _.FRC:$src2))), 6699 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6700 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6701 } 6702} 6703 6704multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6705 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { 6706 let Predicates = [HasAVX512] in { 6707 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6708 OpNodeRnd, f32x_info, "SS">, 6709 EVEX_CD8<32, CD8VT1>, VEX_LIG; 6710 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6711 OpNodeRnd, f64x_info, "SD">, 6712 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 6713 } 6714} 6715 6716defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>; 6717defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; 6718defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; 6719defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; 6720 6721multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, 6722 string Suffix, SDNode Move, 6723 X86VectorVTInfo _, PatLeaf ZeroFP> { 6724 let Predicates = [HasAVX512] in { 6725 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6726 (Op _.FRC:$src2, 6727 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6728 _.FRC:$src3))))), 6729 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6730 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6731 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6732 6733 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6734 (Op _.FRC:$src2, _.FRC:$src3, 6735 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6736 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6737 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6738 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6739 6740 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6741 (Op _.FRC:$src2, 6742 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6743 (_.ScalarLdFrag addr:$src3)))))), 6744 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6745 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6746 addr:$src3)>; 6747 6748 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6749 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6750 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6751 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6752 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6753 addr:$src3)>; 6754 6755 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6756 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6757 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6758 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6759 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6760 addr:$src3)>; 6761 6762 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6763 (X86selects VK1WM:$mask, 6764 (Op _.FRC:$src2, 6765 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6766 _.FRC:$src3), 6767 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6768 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6769 VR128X:$src1, VK1WM:$mask, 6770 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6771 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6772 6773 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6774 (X86selects VK1WM:$mask, 6775 (Op _.FRC:$src2, 6776 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6777 (_.ScalarLdFrag addr:$src3)), 6778 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6779 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6780 VR128X:$src1, VK1WM:$mask, 6781 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6782 6783 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6784 (X86selects VK1WM:$mask, 6785 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6786 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6787 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6788 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6789 VR128X:$src1, VK1WM:$mask, 6790 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6791 6792 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6793 (X86selects VK1WM:$mask, 6794 (Op _.FRC:$src2, _.FRC:$src3, 6795 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6796 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6797 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 6798 VR128X:$src1, VK1WM:$mask, 6799 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6800 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6801 6802 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6803 (X86selects VK1WM:$mask, 6804 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6805 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6806 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6807 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 6808 VR128X:$src1, VK1WM:$mask, 6809 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6810 6811 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6812 (X86selects VK1WM:$mask, 6813 (Op _.FRC:$src2, 6814 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6815 _.FRC:$src3), 6816 (_.EltVT ZeroFP)))))), 6817 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 6818 VR128X:$src1, VK1WM:$mask, 6819 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6820 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6821 6822 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6823 (X86selects VK1WM:$mask, 6824 (Op _.FRC:$src2, _.FRC:$src3, 6825 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6826 (_.EltVT ZeroFP)))))), 6827 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 6828 VR128X:$src1, VK1WM:$mask, 6829 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6830 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6831 6832 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6833 (X86selects VK1WM:$mask, 6834 (Op _.FRC:$src2, 6835 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6836 (_.ScalarLdFrag addr:$src3)), 6837 (_.EltVT ZeroFP)))))), 6838 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 6839 VR128X:$src1, VK1WM:$mask, 6840 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6841 6842 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6843 (X86selects VK1WM:$mask, 6844 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6845 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 6846 (_.EltVT ZeroFP)))))), 6847 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 6848 VR128X:$src1, VK1WM:$mask, 6849 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6850 6851 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6852 (X86selects VK1WM:$mask, 6853 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6854 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6855 (_.EltVT ZeroFP)))))), 6856 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 6857 VR128X:$src1, VK1WM:$mask, 6858 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6859 6860 // Patterns with rounding mode. 6861 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6862 (RndOp _.FRC:$src2, 6863 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6864 _.FRC:$src3, (i32 timm:$rc)))))), 6865 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 6866 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6867 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6868 6869 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6870 (RndOp _.FRC:$src2, _.FRC:$src3, 6871 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6872 (i32 timm:$rc)))))), 6873 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 6874 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6875 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6876 6877 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6878 (X86selects VK1WM:$mask, 6879 (RndOp _.FRC:$src2, 6880 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6881 _.FRC:$src3, (i32 timm:$rc)), 6882 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6883 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 6884 VR128X:$src1, VK1WM:$mask, 6885 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6886 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6887 6888 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6889 (X86selects VK1WM:$mask, 6890 (RndOp _.FRC:$src2, _.FRC:$src3, 6891 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6892 (i32 timm:$rc)), 6893 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6894 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 6895 VR128X:$src1, VK1WM:$mask, 6896 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6897 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6898 6899 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6900 (X86selects VK1WM:$mask, 6901 (RndOp _.FRC:$src2, 6902 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6903 _.FRC:$src3, (i32 timm:$rc)), 6904 (_.EltVT ZeroFP)))))), 6905 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 6906 VR128X:$src1, VK1WM:$mask, 6907 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6908 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6909 6910 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6911 (X86selects VK1WM:$mask, 6912 (RndOp _.FRC:$src2, _.FRC:$src3, 6913 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6914 (i32 timm:$rc)), 6915 (_.EltVT ZeroFP)))))), 6916 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 6917 VR128X:$src1, VK1WM:$mask, 6918 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6919 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6920 } 6921} 6922 6923defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS", 6924 X86Movss, v4f32x_info, fp32imm0>; 6925defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS", 6926 X86Movss, v4f32x_info, fp32imm0>; 6927defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS", 6928 X86Movss, v4f32x_info, fp32imm0>; 6929defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS", 6930 X86Movss, v4f32x_info, fp32imm0>; 6931 6932defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD", 6933 X86Movsd, v2f64x_info, fp64imm0>; 6934defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD", 6935 X86Movsd, v2f64x_info, fp64imm0>; 6936defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD", 6937 X86Movsd, v2f64x_info, fp64imm0>; 6938defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD", 6939 X86Movsd, v2f64x_info, fp64imm0>; 6940 6941//===----------------------------------------------------------------------===// 6942// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 6943//===----------------------------------------------------------------------===// 6944let Constraints = "$src1 = $dst" in { 6945multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6946 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6947 // NOTE: The SDNode have the multiply operands first with the add last. 6948 // This enables commuted load patterns to be autogenerated by tablegen. 6949 let ExeDomain = _.ExeDomain in { 6950 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6951 (ins _.RC:$src2, _.RC:$src3), 6952 OpcodeStr, "$src3, $src2", "$src2, $src3", 6953 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6954 AVX512FMA3Base, Sched<[sched]>; 6955 6956 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6957 (ins _.RC:$src2, _.MemOp:$src3), 6958 OpcodeStr, "$src3, $src2", "$src2, $src3", 6959 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 6960 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6961 6962 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6963 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6964 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6965 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6966 (OpNode _.RC:$src2, 6967 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))), 6968 _.RC:$src1)>, 6969 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6970 } 6971} 6972} // Constraints = "$src1 = $dst" 6973 6974multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6975 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6976 let Predicates = [HasIFMA] in { 6977 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6978 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6979 } 6980 let Predicates = [HasVLX, HasIFMA] in { 6981 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6982 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6983 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6984 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6985 } 6986} 6987 6988defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 6989 SchedWriteVecIMul, avx512vl_i64_info>, 6990 VEX_W; 6991defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 6992 SchedWriteVecIMul, avx512vl_i64_info>, 6993 VEX_W; 6994 6995//===----------------------------------------------------------------------===// 6996// AVX-512 Scalar convert from sign integer to float/double 6997//===----------------------------------------------------------------------===// 6998 6999multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7000 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7001 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7002 string mem> { 7003 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7004 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7005 (ins DstVT.FRC:$src1, SrcRC:$src), 7006 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7007 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7008 let mayLoad = 1 in 7009 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7010 (ins DstVT.FRC:$src1, x86memop:$src), 7011 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7012 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7013 } // hasSideEffects = 0 7014 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7015 (ins DstVT.RC:$src1, SrcRC:$src2), 7016 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7017 [(set DstVT.RC:$dst, 7018 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7019 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7020 7021 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7022 (ins DstVT.RC:$src1, x86memop:$src2), 7023 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7024 [(set DstVT.RC:$dst, 7025 (OpNode (DstVT.VT DstVT.RC:$src1), 7026 (ld_frag addr:$src2)))]>, 7027 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7028 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7029 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7030 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7031} 7032 7033multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7034 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7035 X86VectorVTInfo DstVT, string asm, 7036 string mem> { 7037 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7038 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7039 !strconcat(asm, 7040 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7041 [(set DstVT.RC:$dst, 7042 (OpNode (DstVT.VT DstVT.RC:$src1), 7043 SrcRC:$src2, 7044 (i32 timm:$rc)))]>, 7045 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7046 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7047 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7048 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7049} 7050 7051multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7052 X86FoldableSchedWrite sched, 7053 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7054 X86MemOperand x86memop, PatFrag ld_frag, 7055 string asm, string mem> { 7056 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7057 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7058 ld_frag, asm, mem>, VEX_LIG; 7059} 7060 7061let Predicates = [HasAVX512] in { 7062defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7063 WriteCvtI2SS, GR32, 7064 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7065 XS, EVEX_CD8<32, CD8VT1>; 7066defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7067 WriteCvtI2SS, GR64, 7068 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7069 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7070defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7071 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">, 7072 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7073defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7074 WriteCvtI2SD, GR64, 7075 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7076 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7077 7078def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7079 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7080def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7081 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7082 7083def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 7084 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7085def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 7086 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7087def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 7088 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7089def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 7090 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7091 7092def : Pat<(f32 (sint_to_fp GR32:$src)), 7093 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7094def : Pat<(f32 (sint_to_fp GR64:$src)), 7095 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7096def : Pat<(f64 (sint_to_fp GR32:$src)), 7097 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7098def : Pat<(f64 (sint_to_fp GR64:$src)), 7099 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7100 7101defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7102 WriteCvtI2SS, GR32, 7103 v4f32x_info, i32mem, loadi32, 7104 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7105defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7106 WriteCvtI2SS, GR64, 7107 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7108 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7109defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7110 i32mem, loadi32, "cvtusi2sd", "l">, 7111 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7112defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7113 WriteCvtI2SD, GR64, 7114 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7115 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7116 7117def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7118 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7119def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7120 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7121 7122def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), 7123 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7124def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))), 7125 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7126def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))), 7127 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7128def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))), 7129 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7130 7131def : Pat<(f32 (uint_to_fp GR32:$src)), 7132 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7133def : Pat<(f32 (uint_to_fp GR64:$src)), 7134 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7135def : Pat<(f64 (uint_to_fp GR32:$src)), 7136 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7137def : Pat<(f64 (uint_to_fp GR64:$src)), 7138 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7139} 7140 7141//===----------------------------------------------------------------------===// 7142// AVX-512 Scalar convert from float/double to integer 7143//===----------------------------------------------------------------------===// 7144 7145multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7146 X86VectorVTInfo DstVT, SDNode OpNode, 7147 SDNode OpNodeRnd, 7148 X86FoldableSchedWrite sched, string asm, 7149 string aliasStr> { 7150 let Predicates = [HasAVX512] in { 7151 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7152 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7153 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7154 EVEX, VEX_LIG, Sched<[sched]>; 7155 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7156 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7157 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7158 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7159 Sched<[sched]>; 7160 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7161 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7162 [(set DstVT.RC:$dst, (OpNode 7163 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>, 7164 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7165 } // Predicates = [HasAVX512] 7166 7167 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7168 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7169 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7170 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7171 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7172 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7173 SrcVT.IntScalarMemOp:$src), 0, "att">; 7174} 7175 7176// Convert float/double to signed/unsigned int 32/64 7177defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7178 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7179 XS, EVEX_CD8<32, CD8VT1>; 7180defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7181 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7182 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7183defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7184 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7185 XS, EVEX_CD8<32, CD8VT1>; 7186defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7187 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7188 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7189defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7190 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7191 XD, EVEX_CD8<64, CD8VT1>; 7192defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7193 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7194 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7195defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7196 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7197 XD, EVEX_CD8<64, CD8VT1>; 7198defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7199 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7200 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7201 7202// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7203// which produce unnecessary vmovs{s,d} instructions 7204let Predicates = [HasAVX512] in { 7205def : Pat<(v4f32 (X86Movss 7206 (v4f32 VR128X:$dst), 7207 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), 7208 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7209 7210def : Pat<(v4f32 (X86Movss 7211 (v4f32 VR128X:$dst), 7212 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), 7213 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7214 7215def : Pat<(v4f32 (X86Movss 7216 (v4f32 VR128X:$dst), 7217 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), 7218 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7219 7220def : Pat<(v4f32 (X86Movss 7221 (v4f32 VR128X:$dst), 7222 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), 7223 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7224 7225def : Pat<(v2f64 (X86Movsd 7226 (v2f64 VR128X:$dst), 7227 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), 7228 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7229 7230def : Pat<(v2f64 (X86Movsd 7231 (v2f64 VR128X:$dst), 7232 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), 7233 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7234 7235def : Pat<(v2f64 (X86Movsd 7236 (v2f64 VR128X:$dst), 7237 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), 7238 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7239 7240def : Pat<(v2f64 (X86Movsd 7241 (v2f64 VR128X:$dst), 7242 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), 7243 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7244 7245def : Pat<(v4f32 (X86Movss 7246 (v4f32 VR128X:$dst), 7247 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))), 7248 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7249 7250def : Pat<(v4f32 (X86Movss 7251 (v4f32 VR128X:$dst), 7252 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))), 7253 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7254 7255def : Pat<(v4f32 (X86Movss 7256 (v4f32 VR128X:$dst), 7257 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))), 7258 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7259 7260def : Pat<(v4f32 (X86Movss 7261 (v4f32 VR128X:$dst), 7262 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))), 7263 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7264 7265def : Pat<(v2f64 (X86Movsd 7266 (v2f64 VR128X:$dst), 7267 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))), 7268 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7269 7270def : Pat<(v2f64 (X86Movsd 7271 (v2f64 VR128X:$dst), 7272 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))), 7273 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7274 7275def : Pat<(v2f64 (X86Movsd 7276 (v2f64 VR128X:$dst), 7277 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))), 7278 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7279 7280def : Pat<(v2f64 (X86Movsd 7281 (v2f64 VR128X:$dst), 7282 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))), 7283 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7284} // Predicates = [HasAVX512] 7285 7286// Convert float/double to signed/unsigned int 32/64 with truncation 7287multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7288 X86VectorVTInfo _DstRC, SDNode OpNode, 7289 SDNode OpNodeInt, SDNode OpNodeSAE, 7290 X86FoldableSchedWrite sched, string aliasStr>{ 7291let Predicates = [HasAVX512] in { 7292 let isCodeGenOnly = 1 in { 7293 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7294 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7295 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7296 EVEX, VEX_LIG, Sched<[sched]>; 7297 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7298 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7299 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7300 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7301 } 7302 7303 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7304 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7305 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7306 EVEX, VEX_LIG, Sched<[sched]>; 7307 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7308 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7309 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7310 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7311 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7312 (ins _SrcRC.IntScalarMemOp:$src), 7313 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7314 [(set _DstRC.RC:$dst, 7315 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>, 7316 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7317} //HasAVX512 7318 7319 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7320 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7321 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7322 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7323 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7324 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7325 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7326} 7327 7328defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7329 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7330 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7331defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7332 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7333 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7334defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7335 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7336 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7337defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7338 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7339 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7340 7341defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7342 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7343 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7344defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7345 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7346 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7347defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7348 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7349 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7350defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7351 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7352 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7353 7354//===----------------------------------------------------------------------===// 7355// AVX-512 Convert form float to double and back 7356//===----------------------------------------------------------------------===// 7357 7358multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7359 X86VectorVTInfo _Src, SDNode OpNode, 7360 X86FoldableSchedWrite sched> { 7361 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7362 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7363 "$src2, $src1", "$src1, $src2", 7364 (_.VT (OpNode (_.VT _.RC:$src1), 7365 (_Src.VT _Src.RC:$src2)))>, 7366 EVEX_4V, VEX_LIG, Sched<[sched]>; 7367 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7368 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7369 "$src2, $src1", "$src1, $src2", 7370 (_.VT (OpNode (_.VT _.RC:$src1), 7371 (_Src.VT _Src.ScalarIntMemCPat:$src2)))>, 7372 EVEX_4V, VEX_LIG, 7373 Sched<[sched.Folded, sched.ReadAfterFold]>; 7374 7375 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7376 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7377 (ins _.FRC:$src1, _Src.FRC:$src2), 7378 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7379 EVEX_4V, VEX_LIG, Sched<[sched]>; 7380 let mayLoad = 1 in 7381 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7382 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7383 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7384 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7385 } 7386} 7387 7388// Scalar Coversion with SAE - suppress all exceptions 7389multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7390 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7391 X86FoldableSchedWrite sched> { 7392 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7393 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7394 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7395 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7396 (_Src.VT _Src.RC:$src2)))>, 7397 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7398} 7399 7400// Scalar Conversion with rounding control (RC) 7401multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7402 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7403 X86FoldableSchedWrite sched> { 7404 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7405 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7406 "$rc, $src2, $src1", "$src1, $src2, $rc", 7407 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7408 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7409 EVEX_4V, VEX_LIG, Sched<[sched]>, 7410 EVEX_B, EVEX_RC; 7411} 7412multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, 7413 SDNode OpNode, SDNode OpNodeRnd, 7414 X86FoldableSchedWrite sched, 7415 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7416 let Predicates = [HasAVX512] in { 7417 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7418 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7419 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; 7420 } 7421} 7422 7423multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, 7424 SDNode OpNode, SDNode OpNodeSAE, 7425 X86FoldableSchedWrite sched, 7426 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7427 let Predicates = [HasAVX512] in { 7428 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7429 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7430 EVEX_CD8<32, CD8VT1>, XS; 7431 } 7432} 7433defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds, 7434 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7435 f32x_info>; 7436defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts, 7437 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7438 f64x_info>; 7439 7440def : Pat<(f64 (fpextend FR32X:$src)), 7441 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7442 Requires<[HasAVX512]>; 7443def : Pat<(f64 (fpextend (loadf32 addr:$src))), 7444 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7445 Requires<[HasAVX512, OptForSize]>; 7446 7447def : Pat<(f32 (fpround FR64X:$src)), 7448 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7449 Requires<[HasAVX512]>; 7450 7451def : Pat<(v4f32 (X86Movss 7452 (v4f32 VR128X:$dst), 7453 (v4f32 (scalar_to_vector 7454 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7455 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7456 Requires<[HasAVX512]>; 7457 7458def : Pat<(v2f64 (X86Movsd 7459 (v2f64 VR128X:$dst), 7460 (v2f64 (scalar_to_vector 7461 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7462 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7463 Requires<[HasAVX512]>; 7464 7465//===----------------------------------------------------------------------===// 7466// AVX-512 Vector convert from signed/unsigned integer to float/double 7467// and from float/double to signed/unsigned integer 7468//===----------------------------------------------------------------------===// 7469 7470multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7471 X86VectorVTInfo _Src, SDNode OpNode, 7472 X86FoldableSchedWrite sched, 7473 string Broadcast = _.BroadcastStr, 7474 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7475 RegisterClass MaskRC = _.KRCWM, 7476 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7477 7478 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst), 7479 (ins _Src.RC:$src), 7480 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7481 (ins MaskRC:$mask, _Src.RC:$src), 7482 OpcodeStr, "$src", "$src", 7483 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7484 (vselect MaskRC:$mask, 7485 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7486 _.RC:$src0), 7487 vselect, "$src0 = $dst">, 7488 EVEX, Sched<[sched]>; 7489 7490 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), 7491 (ins MemOp:$src), 7492 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7493 (ins MaskRC:$mask, MemOp:$src), 7494 OpcodeStr#Alias, "$src", "$src", 7495 LdDAG, 7496 (vselect MaskRC:$mask, LdDAG, _.RC:$src0), 7497 vselect, "$src0 = $dst">, 7498 EVEX, Sched<[sched.Folded]>; 7499 7500 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), 7501 (ins _Src.ScalarMemOp:$src), 7502 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7503 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7504 OpcodeStr, 7505 "${src}"##Broadcast, "${src}"##Broadcast, 7506 (_.VT (OpNode (_Src.VT 7507 (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) 7508 )), 7509 (vselect MaskRC:$mask, 7510 (_.VT 7511 (OpNode 7512 (_Src.VT 7513 (X86VBroadcast 7514 (_Src.ScalarLdFrag addr:$src))))), 7515 _.RC:$src0), 7516 vselect, "$src0 = $dst">, 7517 EVEX, EVEX_B, Sched<[sched.Folded]>; 7518} 7519// Coversion with SAE - suppress all exceptions 7520multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7521 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7522 X86FoldableSchedWrite sched> { 7523 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7524 (ins _Src.RC:$src), OpcodeStr, 7525 "{sae}, $src", "$src, {sae}", 7526 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7527 EVEX, EVEX_B, Sched<[sched]>; 7528} 7529 7530// Conversion with rounding control (RC) 7531multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7532 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7533 X86FoldableSchedWrite sched> { 7534 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7535 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7536 "$rc, $src", "$src, $rc", 7537 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7538 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7539} 7540 7541// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7542multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7543 X86VectorVTInfo _Src, SDNode OpNode, 7544 X86FoldableSchedWrite sched, 7545 string Broadcast = _.BroadcastStr, 7546 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7547 RegisterClass MaskRC = _.KRCWM> 7548 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias, 7549 MemOp, MaskRC, 7550 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7551 7552// Extend Float to Double 7553multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, 7554 X86SchedWriteWidths sched> { 7555 let Predicates = [HasAVX512] in { 7556 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, 7557 fpextend, sched.ZMM>, 7558 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, 7559 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7560 } 7561 let Predicates = [HasVLX] in { 7562 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, 7563 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7564 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend, 7565 sched.YMM>, EVEX_V256; 7566 } 7567} 7568 7569// Truncate Double to Float 7570multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7571 let Predicates = [HasAVX512] in { 7572 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>, 7573 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, 7574 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7575 } 7576 let Predicates = [HasVLX] in { 7577 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, 7578 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>, 7579 EVEX_V128; 7580 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround, 7581 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7582 } 7583 7584 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7585 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7586 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7587 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7588 VK2WM:$mask, VR128X:$src), 0, "att">; 7589 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|" 7590 "$dst {${mask}} {z}, $src}", 7591 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7592 VK2WM:$mask, VR128X:$src), 0, "att">; 7593 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7594 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7595 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7596 "$dst {${mask}}, ${src}{1to2}}", 7597 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7598 VK2WM:$mask, f64mem:$src), 0, "att">; 7599 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7600 "$dst {${mask}} {z}, ${src}{1to2}}", 7601 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7602 VK2WM:$mask, f64mem:$src), 0, "att">; 7603 7604 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7605 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7606 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7607 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7608 VK4WM:$mask, VR256X:$src), 0, "att">; 7609 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|" 7610 "$dst {${mask}} {z}, $src}", 7611 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7612 VK4WM:$mask, VR256X:$src), 0, "att">; 7613 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7614 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7615 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7616 "$dst {${mask}}, ${src}{1to4}}", 7617 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7618 VK4WM:$mask, f64mem:$src), 0, "att">; 7619 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7620 "$dst {${mask}} {z}, ${src}{1to4}}", 7621 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7622 VK4WM:$mask, f64mem:$src), 0, "att">; 7623} 7624 7625defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, 7626 VEX_W, PD, EVEX_CD8<64, CD8VF>; 7627defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, 7628 PS, EVEX_CD8<32, CD8VH>; 7629 7630let Predicates = [HasAVX512] in { 7631 def : Pat<(v8f32 (fpround (v8f64 VR512:$src))), 7632 (VCVTPD2PSZrr VR512:$src)>; 7633 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), 7634 VR256X:$src0), 7635 (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>; 7636 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), 7637 v8f32x_info.ImmAllZerosV), 7638 (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>; 7639 7640 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), 7641 (VCVTPD2PSZrm addr:$src)>; 7642 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), 7643 VR256X:$src0), 7644 (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 7645 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), 7646 v8f32x_info.ImmAllZerosV), 7647 (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>; 7648 7649 def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))), 7650 (VCVTPD2PSZrmb addr:$src)>; 7651 def : Pat<(vselect VK8WM:$mask, 7652 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), 7653 (v8f32 VR256X:$src0)), 7654 (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>; 7655 def : Pat<(vselect VK8WM:$mask, 7656 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), 7657 v8f32x_info.ImmAllZerosV), 7658 (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; 7659} 7660 7661let Predicates = [HasVLX] in { 7662 def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))), 7663 (VCVTPD2PSZ256rr VR256X:$src)>; 7664 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), 7665 VR128X:$src0), 7666 (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 7667 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), 7668 v4f32x_info.ImmAllZerosV), 7669 (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>; 7670 7671 def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), 7672 (VCVTPD2PSZ256rm addr:$src)>; 7673 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), 7674 VR128X:$src0), 7675 (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 7676 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), 7677 v4f32x_info.ImmAllZerosV), 7678 (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>; 7679 7680 def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), 7681 (VCVTPD2PSZ256rmb addr:$src)>; 7682 def : Pat<(vselect VK4WM:$mask, 7683 (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), 7684 VR128X:$src0), 7685 (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 7686 def : Pat<(vselect VK4WM:$mask, 7687 (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), 7688 v4f32x_info.ImmAllZerosV), 7689 (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; 7690 7691 // Special patterns to allow use of X86vmfpround for masking. Instruction 7692 // patterns have been disabled with null_frag. 7693 def : Pat<(X86vfpround (v2f64 VR128X:$src)), 7694 (VCVTPD2PSZ128rr VR128X:$src)>; 7695 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), 7696 VK2WM:$mask), 7697 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 7698 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV, 7699 VK2WM:$mask), 7700 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 7701 7702 def : Pat<(X86vfpround (loadv2f64 addr:$src)), 7703 (VCVTPD2PSZ128rm addr:$src)>; 7704 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), 7705 VK2WM:$mask), 7706 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7707 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV, 7708 VK2WM:$mask), 7709 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; 7710 7711 def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))), 7712 (VCVTPD2PSZ128rmb addr:$src)>; 7713 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), 7714 (v4f32 VR128X:$src0), VK2WM:$mask), 7715 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7716 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), 7717 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 7718 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 7719} 7720 7721// Convert Signed/Unsigned Doubleword to Double 7722multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7723 SDNode OpNode128, X86SchedWriteWidths sched> { 7724 // No rounding in this op 7725 let Predicates = [HasAVX512] in 7726 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 7727 sched.ZMM>, EVEX_V512; 7728 7729 let Predicates = [HasVLX] in { 7730 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 7731 OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM, 7732 (v2f64 (OpNode128 (bc_v4i32 7733 (v2i64 7734 (scalar_to_vector (loadi64 addr:$src))))))>, 7735 EVEX_V128; 7736 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 7737 sched.YMM>, EVEX_V256; 7738 } 7739} 7740 7741// Convert Signed/Unsigned Doubleword to Float 7742multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7743 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7744 let Predicates = [HasAVX512] in 7745 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 7746 sched.ZMM>, 7747 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 7748 OpNodeRnd, sched.ZMM>, EVEX_V512; 7749 7750 let Predicates = [HasVLX] in { 7751 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 7752 sched.XMM>, EVEX_V128; 7753 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 7754 sched.YMM>, EVEX_V256; 7755 } 7756} 7757 7758// Convert Float to Signed/Unsigned Doubleword with truncation 7759multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7760 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7761 let Predicates = [HasAVX512] in { 7762 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7763 sched.ZMM>, 7764 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 7765 OpNodeSAE, sched.ZMM>, EVEX_V512; 7766 } 7767 let Predicates = [HasVLX] in { 7768 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7769 sched.XMM>, EVEX_V128; 7770 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7771 sched.YMM>, EVEX_V256; 7772 } 7773} 7774 7775// Convert Float to Signed/Unsigned Doubleword 7776multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7777 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7778 let Predicates = [HasAVX512] in { 7779 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7780 sched.ZMM>, 7781 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 7782 OpNodeRnd, sched.ZMM>, EVEX_V512; 7783 } 7784 let Predicates = [HasVLX] in { 7785 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7786 sched.XMM>, EVEX_V128; 7787 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7788 sched.YMM>, EVEX_V256; 7789 } 7790} 7791 7792// Convert Double to Signed/Unsigned Doubleword with truncation 7793multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7794 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7795 let Predicates = [HasAVX512] in { 7796 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7797 sched.ZMM>, 7798 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 7799 OpNodeSAE, sched.ZMM>, EVEX_V512; 7800 } 7801 let Predicates = [HasVLX] in { 7802 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7803 // memory forms of these instructions in Asm Parser. They have the same 7804 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7805 // due to the same reason. 7806 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7807 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7808 VK2WM>, EVEX_V128; 7809 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7810 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7811 } 7812 7813 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7814 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 7815 VR128X:$src), 0, "att">; 7816 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7817 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7818 VK2WM:$mask, VR128X:$src), 0, "att">; 7819 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7820 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7821 VK2WM:$mask, VR128X:$src), 0, "att">; 7822 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7823 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7824 f64mem:$src), 0, "att">; 7825 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7826 "$dst {${mask}}, ${src}{1to2}}", 7827 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7828 VK2WM:$mask, f64mem:$src), 0, "att">; 7829 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7830 "$dst {${mask}} {z}, ${src}{1to2}}", 7831 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7832 VK2WM:$mask, f64mem:$src), 0, "att">; 7833 7834 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7835 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 7836 VR256X:$src), 0, "att">; 7837 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7838 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7839 VK4WM:$mask, VR256X:$src), 0, "att">; 7840 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7841 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7842 VK4WM:$mask, VR256X:$src), 0, "att">; 7843 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7844 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7845 f64mem:$src), 0, "att">; 7846 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7847 "$dst {${mask}}, ${src}{1to4}}", 7848 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7849 VK4WM:$mask, f64mem:$src), 0, "att">; 7850 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7851 "$dst {${mask}} {z}, ${src}{1to4}}", 7852 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7853 VK4WM:$mask, f64mem:$src), 0, "att">; 7854} 7855 7856// Convert Double to Signed/Unsigned Doubleword 7857multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7858 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7859 let Predicates = [HasAVX512] in { 7860 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7861 sched.ZMM>, 7862 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 7863 OpNodeRnd, sched.ZMM>, EVEX_V512; 7864 } 7865 let Predicates = [HasVLX] in { 7866 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7867 // memory forms of these instructions in Asm Parcer. They have the same 7868 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7869 // due to the same reason. 7870 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7871 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7872 VK2WM>, EVEX_V128; 7873 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7874 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7875 } 7876 7877 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7878 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7879 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7880 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7881 VK2WM:$mask, VR128X:$src), 0, "att">; 7882 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7883 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7884 VK2WM:$mask, VR128X:$src), 0, "att">; 7885 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7886 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7887 f64mem:$src), 0, "att">; 7888 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 7889 "$dst {${mask}}, ${src}{1to2}}", 7890 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7891 VK2WM:$mask, f64mem:$src), 0, "att">; 7892 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7893 "$dst {${mask}} {z}, ${src}{1to2}}", 7894 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7895 VK2WM:$mask, f64mem:$src), 0, "att">; 7896 7897 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7898 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7899 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7900 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7901 VK4WM:$mask, VR256X:$src), 0, "att">; 7902 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7903 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7904 VK4WM:$mask, VR256X:$src), 0, "att">; 7905 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7906 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7907 f64mem:$src), 0, "att">; 7908 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 7909 "$dst {${mask}}, ${src}{1to4}}", 7910 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7911 VK4WM:$mask, f64mem:$src), 0, "att">; 7912 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7913 "$dst {${mask}} {z}, ${src}{1to4}}", 7914 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7915 VK4WM:$mask, f64mem:$src), 0, "att">; 7916} 7917 7918// Convert Double to Signed/Unsigned Quardword 7919multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7920 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7921 let Predicates = [HasDQI] in { 7922 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7923 sched.ZMM>, 7924 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 7925 OpNodeRnd, sched.ZMM>, EVEX_V512; 7926 } 7927 let Predicates = [HasDQI, HasVLX] in { 7928 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7929 sched.XMM>, EVEX_V128; 7930 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7931 sched.YMM>, EVEX_V256; 7932 } 7933} 7934 7935// Convert Double to Signed/Unsigned Quardword with truncation 7936multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7937 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7938 let Predicates = [HasDQI] in { 7939 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7940 sched.ZMM>, 7941 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 7942 OpNodeRnd, sched.ZMM>, EVEX_V512; 7943 } 7944 let Predicates = [HasDQI, HasVLX] in { 7945 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7946 sched.XMM>, EVEX_V128; 7947 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7948 sched.YMM>, EVEX_V256; 7949 } 7950} 7951 7952// Convert Signed/Unsigned Quardword to Double 7953multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7954 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7955 let Predicates = [HasDQI] in { 7956 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 7957 sched.ZMM>, 7958 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 7959 OpNodeRnd, sched.ZMM>, EVEX_V512; 7960 } 7961 let Predicates = [HasDQI, HasVLX] in { 7962 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 7963 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 7964 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 7965 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 7966 } 7967} 7968 7969// Convert Float to Signed/Unsigned Quardword 7970multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7971 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7972 let Predicates = [HasDQI] in { 7973 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 7974 sched.ZMM>, 7975 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 7976 OpNodeRnd, sched.ZMM>, EVEX_V512; 7977 } 7978 let Predicates = [HasDQI, HasVLX] in { 7979 // Explicitly specified broadcast string, since we take only 2 elements 7980 // from v4f32x_info source 7981 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 7982 sched.XMM, "{1to2}", "", f64mem, VK2WM, 7983 (v2i64 (OpNode (bc_v4f32 7984 (v2f64 7985 (scalar_to_vector (loadf64 addr:$src))))))>, 7986 EVEX_V128; 7987 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 7988 sched.YMM>, EVEX_V256; 7989 } 7990} 7991 7992// Convert Float to Signed/Unsigned Quardword with truncation 7993multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7994 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7995 let Predicates = [HasDQI] in { 7996 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>, 7997 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 7998 OpNodeRnd, sched.ZMM>, EVEX_V512; 7999 } 8000 let Predicates = [HasDQI, HasVLX] in { 8001 // Explicitly specified broadcast string, since we take only 2 elements 8002 // from v4f32x_info source 8003 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8004 sched.XMM, "{1to2}", "", f64mem, VK2WM, 8005 (v2i64 (OpNode (bc_v4f32 8006 (v2f64 8007 (scalar_to_vector (loadf64 addr:$src))))))>, 8008 EVEX_V128; 8009 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8010 sched.YMM>, EVEX_V256; 8011 } 8012} 8013 8014// Convert Signed/Unsigned Quardword to Float 8015multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 8016 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 8017 let Predicates = [HasDQI] in { 8018 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, 8019 sched.ZMM>, 8020 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, 8021 OpNodeRnd, sched.ZMM>, EVEX_V512; 8022 } 8023 let Predicates = [HasDQI, HasVLX] in { 8024 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8025 // memory forms of these instructions in Asm Parcer. They have the same 8026 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8027 // due to the same reason. 8028 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag, 8029 sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>, 8030 EVEX_V128, NotEVEX2VEXConvertible; 8031 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, 8032 sched.YMM, "{1to4}", "{y}">, EVEX_V256, 8033 NotEVEX2VEXConvertible; 8034 } 8035 8036 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 8037 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8038 VR128X:$src), 0, "att">; 8039 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8040 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8041 VK2WM:$mask, VR128X:$src), 0, "att">; 8042 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8043 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8044 VK2WM:$mask, VR128X:$src), 0, "att">; 8045 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8046 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8047 i64mem:$src), 0, "att">; 8048 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" 8049 "$dst {${mask}}, ${src}{1to2}}", 8050 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8051 VK2WM:$mask, i64mem:$src), 0, "att">; 8052 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8053 "$dst {${mask}} {z}, ${src}{1to2}}", 8054 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8055 VK2WM:$mask, i64mem:$src), 0, "att">; 8056 8057 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 8058 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8059 VR256X:$src), 0, "att">; 8060 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|" 8061 "$dst {${mask}}, $src}", 8062 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8063 VK4WM:$mask, VR256X:$src), 0, "att">; 8064 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|" 8065 "$dst {${mask}} {z}, $src}", 8066 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8067 VK4WM:$mask, VR256X:$src), 0, "att">; 8068 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8069 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8070 i64mem:$src), 0, "att">; 8071 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" 8072 "$dst {${mask}}, ${src}{1to4}}", 8073 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8074 VK4WM:$mask, i64mem:$src), 0, "att">; 8075 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8076 "$dst {${mask}} {z}, ${src}{1to4}}", 8077 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8078 VK4WM:$mask, i64mem:$src), 0, "att">; 8079} 8080 8081defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP, 8082 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8083 8084defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, 8085 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8086 PS, EVEX_CD8<32, CD8VF>; 8087 8088defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si, 8089 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, 8090 XS, EVEX_CD8<32, CD8VF>; 8091 8092defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si, 8093 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, 8094 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8095 8096defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui, 8097 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS, 8098 EVEX_CD8<32, CD8VF>; 8099 8100defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui, 8101 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, 8102 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8103 8104defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, 8105 X86VUintToFP, SchedWriteCvtDQ2PD>, XS, 8106 EVEX_CD8<32, CD8VH>; 8107 8108defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, 8109 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD, 8110 EVEX_CD8<32, CD8VF>; 8111 8112defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, 8113 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8114 EVEX_CD8<32, CD8VF>; 8115 8116defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, 8117 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8118 VEX_W, EVEX_CD8<64, CD8VF>; 8119 8120defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, 8121 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8122 PS, EVEX_CD8<32, CD8VF>; 8123 8124defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, 8125 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8126 PS, EVEX_CD8<64, CD8VF>; 8127 8128defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, 8129 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8130 PD, EVEX_CD8<64, CD8VF>; 8131 8132defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, 8133 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8134 EVEX_CD8<32, CD8VH>; 8135 8136defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, 8137 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8138 PD, EVEX_CD8<64, CD8VF>; 8139 8140defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, 8141 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8142 EVEX_CD8<32, CD8VH>; 8143 8144defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si, 8145 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W, 8146 PD, EVEX_CD8<64, CD8VF>; 8147 8148defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si, 8149 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD, 8150 EVEX_CD8<32, CD8VH>; 8151 8152defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui, 8153 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W, 8154 PD, EVEX_CD8<64, CD8VF>; 8155 8156defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui, 8157 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD, 8158 EVEX_CD8<32, CD8VH>; 8159 8160defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, 8161 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8162 EVEX_CD8<64, CD8VF>; 8163 8164defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, 8165 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8166 EVEX_CD8<64, CD8VF>; 8167 8168defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, 8169 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS, 8170 EVEX_CD8<64, CD8VF>; 8171 8172defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, 8173 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, 8174 EVEX_CD8<64, CD8VF>; 8175 8176let Predicates = [HasVLX] in { 8177 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8178 // patterns have been disabled with null_frag. 8179 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8180 (VCVTPD2DQZ128rr VR128X:$src)>; 8181 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8182 VK2WM:$mask), 8183 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8184 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8185 VK2WM:$mask), 8186 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8187 8188 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8189 (VCVTPD2DQZ128rm addr:$src)>; 8190 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8191 VK2WM:$mask), 8192 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8193 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8194 VK2WM:$mask), 8195 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8196 8197 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))), 8198 (VCVTPD2DQZ128rmb addr:$src)>; 8199 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), 8200 (v4i32 VR128X:$src0), VK2WM:$mask), 8201 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8202 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), 8203 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8204 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8205 8206 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8207 // patterns have been disabled with null_frag. 8208 def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))), 8209 (VCVTTPD2DQZ128rr VR128X:$src)>; 8210 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8211 VK2WM:$mask), 8212 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8213 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8214 VK2WM:$mask), 8215 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8216 8217 def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))), 8218 (VCVTTPD2DQZ128rm addr:$src)>; 8219 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8220 VK2WM:$mask), 8221 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8222 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8223 VK2WM:$mask), 8224 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8225 8226 def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))), 8227 (VCVTTPD2DQZ128rmb addr:$src)>; 8228 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), 8229 (v4i32 VR128X:$src0), VK2WM:$mask), 8230 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8231 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), 8232 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8233 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8234 8235 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8236 // patterns have been disabled with null_frag. 8237 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8238 (VCVTPD2UDQZ128rr VR128X:$src)>; 8239 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8240 VK2WM:$mask), 8241 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8242 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8243 VK2WM:$mask), 8244 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8245 8246 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8247 (VCVTPD2UDQZ128rm addr:$src)>; 8248 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8249 VK2WM:$mask), 8250 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8251 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8252 VK2WM:$mask), 8253 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8254 8255 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))), 8256 (VCVTPD2UDQZ128rmb addr:$src)>; 8257 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), 8258 (v4i32 VR128X:$src0), VK2WM:$mask), 8259 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8260 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), 8261 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8262 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8263 8264 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8265 // patterns have been disabled with null_frag. 8266 def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))), 8267 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8268 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8269 VK2WM:$mask), 8270 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8271 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8272 VK2WM:$mask), 8273 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8274 8275 def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))), 8276 (VCVTTPD2UDQZ128rm addr:$src)>; 8277 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8278 VK2WM:$mask), 8279 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8280 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8281 VK2WM:$mask), 8282 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8283 8284 def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))), 8285 (VCVTTPD2UDQZ128rmb addr:$src)>; 8286 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), 8287 (v4i32 VR128X:$src0), VK2WM:$mask), 8288 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8289 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), 8290 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8291 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8292} 8293 8294let Predicates = [HasDQI, HasVLX] in { 8295 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8296 (VCVTPS2QQZ128rm addr:$src)>; 8297 def : Pat<(v2i64 (vselect VK2WM:$mask, 8298 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8299 VR128X:$src0)), 8300 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8301 def : Pat<(v2i64 (vselect VK2WM:$mask, 8302 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8303 v2i64x_info.ImmAllZerosV)), 8304 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8305 8306 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8307 (VCVTPS2UQQZ128rm addr:$src)>; 8308 def : Pat<(v2i64 (vselect VK2WM:$mask, 8309 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8310 VR128X:$src0)), 8311 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8312 def : Pat<(v2i64 (vselect VK2WM:$mask, 8313 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8314 v2i64x_info.ImmAllZerosV)), 8315 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8316 8317 def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8318 (VCVTTPS2QQZ128rm addr:$src)>; 8319 def : Pat<(v2i64 (vselect VK2WM:$mask, 8320 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8321 VR128X:$src0)), 8322 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8323 def : Pat<(v2i64 (vselect VK2WM:$mask, 8324 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8325 v2i64x_info.ImmAllZerosV)), 8326 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8327 8328 def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8329 (VCVTTPS2UQQZ128rm addr:$src)>; 8330 def : Pat<(v2i64 (vselect VK2WM:$mask, 8331 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8332 VR128X:$src0)), 8333 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8334 def : Pat<(v2i64 (vselect VK2WM:$mask, 8335 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8336 v2i64x_info.ImmAllZerosV)), 8337 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8338} 8339 8340let Predicates = [HasAVX512, NoVLX] in { 8341def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))), 8342 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 8343 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), 8344 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8345 8346def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))), 8347 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 8348 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), 8349 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8350 8351def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))), 8352 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr 8353 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8354 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8355 8356def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), 8357 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr 8358 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 8359 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8360 8361def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), 8362 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr 8363 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 8364 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8365 8366def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), 8367 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr 8368 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), 8369 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8370 8371def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))), 8372 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr 8373 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), 8374 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8375} 8376 8377let Predicates = [HasVLX] in { 8378 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8379 (VCVTDQ2PDZ128rm addr:$src)>; 8380 def : Pat<(v2f64 (vselect VK2WM:$mask, 8381 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8382 VR128X:$src0)), 8383 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8384 def : Pat<(v2f64 (vselect VK2WM:$mask, 8385 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8386 v2f64x_info.ImmAllZerosV)), 8387 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8388 8389 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8390 (VCVTUDQ2PDZ128rm addr:$src)>; 8391 def : Pat<(v2f64 (vselect VK2WM:$mask, 8392 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8393 VR128X:$src0)), 8394 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8395 def : Pat<(v2f64 (vselect VK2WM:$mask, 8396 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8397 v2f64x_info.ImmAllZerosV)), 8398 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8399} 8400 8401let Predicates = [HasDQI, HasVLX] in { 8402 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 8403 // patterns have been disabled with null_frag. 8404 def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))), 8405 (VCVTQQ2PSZ128rr VR128X:$src)>; 8406 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8407 VK2WM:$mask), 8408 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8409 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8410 VK2WM:$mask), 8411 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8412 8413 def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))), 8414 (VCVTQQ2PSZ128rm addr:$src)>; 8415 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8416 VK2WM:$mask), 8417 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8418 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8419 VK2WM:$mask), 8420 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8421 8422 def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), 8423 (VCVTQQ2PSZ128rmb addr:$src)>; 8424 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), 8425 (v4f32 VR128X:$src0), VK2WM:$mask), 8426 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8427 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), 8428 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8429 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8430 8431 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 8432 // patterns have been disabled with null_frag. 8433 def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))), 8434 (VCVTUQQ2PSZ128rr VR128X:$src)>; 8435 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8436 VK2WM:$mask), 8437 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8438 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8439 VK2WM:$mask), 8440 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8441 8442 def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))), 8443 (VCVTUQQ2PSZ128rm addr:$src)>; 8444 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8445 VK2WM:$mask), 8446 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8447 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8448 VK2WM:$mask), 8449 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8450 8451 def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), 8452 (VCVTUQQ2PSZ128rmb addr:$src)>; 8453 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), 8454 (v4f32 VR128X:$src0), VK2WM:$mask), 8455 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8456 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), 8457 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8458 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8459} 8460 8461let Predicates = [HasDQI, NoVLX] in { 8462def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))), 8463 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr 8464 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8465 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8466 8467def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))), 8468 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr 8469 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), 8470 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8471 8472def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))), 8473 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr 8474 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8475 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8476 8477def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))), 8478 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr 8479 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8480 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8481 8482def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))), 8483 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr 8484 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), 8485 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8486 8487def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))), 8488 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr 8489 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8490 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8491 8492def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))), 8493 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr 8494 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8495 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8496 8497def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))), 8498 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr 8499 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8500 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8501 8502def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))), 8503 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr 8504 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8505 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8506 8507def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))), 8508 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr 8509 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8510 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8511 8512def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))), 8513 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr 8514 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8515 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8516 8517def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), 8518 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr 8519 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8520 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8521} 8522 8523//===----------------------------------------------------------------------===// 8524// Half precision conversion instructions 8525//===----------------------------------------------------------------------===// 8526 8527multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8528 X86MemOperand x86memop, PatFrag ld_frag, 8529 X86FoldableSchedWrite sched> { 8530 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8531 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8532 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8533 T8PD, Sched<[sched]>; 8534 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8535 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8536 (X86cvtph2ps (_src.VT 8537 (ld_frag addr:$src)))>, 8538 T8PD, Sched<[sched.Folded]>; 8539} 8540 8541multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8542 X86FoldableSchedWrite sched> { 8543 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8544 (ins _src.RC:$src), "vcvtph2ps", 8545 "{sae}, $src", "$src, {sae}", 8546 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8547 T8PD, EVEX_B, Sched<[sched]>; 8548} 8549 8550let Predicates = [HasAVX512] in 8551 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load, 8552 WriteCvtPH2PSZ>, 8553 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8554 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8555 8556let Predicates = [HasVLX] in { 8557 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8558 load, WriteCvtPH2PSY>, EVEX, EVEX_V256, 8559 EVEX_CD8<32, CD8VH>; 8560 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8561 load, WriteCvtPH2PS>, EVEX, EVEX_V128, 8562 EVEX_CD8<32, CD8VH>; 8563 8564 // Pattern match vcvtph2ps of a scalar i64 load. 8565 def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 8566 (VCVTPH2PSZ128rm addr:$src)>; 8567 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert 8568 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8569 (VCVTPH2PSZ128rm addr:$src)>; 8570} 8571 8572multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8573 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8574let ExeDomain = GenericDomain in { 8575 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8576 (ins _src.RC:$src1, i32u8imm:$src2), 8577 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8578 [(set _dest.RC:$dst, 8579 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>, 8580 Sched<[RR]>; 8581 let Constraints = "$src0 = $dst" in 8582 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8583 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8584 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8585 [(set _dest.RC:$dst, 8586 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), 8587 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8588 Sched<[RR]>, EVEX_K; 8589 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8590 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8591 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8592 [(set _dest.RC:$dst, 8593 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), 8594 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8595 Sched<[RR]>, EVEX_KZ; 8596 let hasSideEffects = 0, mayStore = 1 in { 8597 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8598 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8599 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8600 Sched<[MR]>; 8601 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8602 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8603 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8604 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 8605 } 8606} 8607} 8608 8609multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8610 SchedWrite Sched> { 8611 let hasSideEffects = 0 in 8612 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 8613 (outs _dest.RC:$dst), 8614 (ins _src.RC:$src1, i32u8imm:$src2), 8615 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 8616 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 8617} 8618 8619let Predicates = [HasAVX512] in { 8620 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8621 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8622 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8623 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8624 let Predicates = [HasVLX] in { 8625 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 8626 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 8627 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 8628 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 8629 WriteCvtPS2PH, WriteCvtPS2PHSt>, 8630 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 8631 } 8632 8633 def : Pat<(store (f64 (extractelt 8634 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), 8635 (iPTR 0))), addr:$dst), 8636 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; 8637 def : Pat<(store (i64 (extractelt 8638 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), 8639 (iPTR 0))), addr:$dst), 8640 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; 8641 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), 8642 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; 8643 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), 8644 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; 8645} 8646 8647// Patterns for matching conversions from float to half-float and vice versa. 8648let Predicates = [HasVLX] in { 8649 // Use MXCSR.RC for rounding instead of explicitly specifying the default 8650 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the 8651 // configurations we support (the default). However, falling back to MXCSR is 8652 // more consistent with other instructions, which are always controlled by it. 8653 // It's encoded as 0b100. 8654 def : Pat<(fp_to_f16 FR32X:$src), 8655 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr 8656 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>; 8657 8658 def : Pat<(f16_to_fp GR16:$src), 8659 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8660 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >; 8661 8662 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))), 8663 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8664 (v8i16 (VCVTPS2PHZ128rr 8665 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >; 8666} 8667 8668// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 8669multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 8670 string OpcodeStr, X86FoldableSchedWrite sched> { 8671 let hasSideEffects = 0 in 8672 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 8673 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 8674 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 8675} 8676 8677let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8678 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, 8679 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8680 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, 8681 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8682 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, 8683 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8684 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, 8685 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8686} 8687 8688let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8689 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, 8690 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, 8691 EVEX_CD8<32, CD8VT1>; 8692 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, 8693 "ucomisd", WriteFCom>, PD, EVEX, 8694 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8695 let Pattern = []<dag> in { 8696 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, 8697 "comiss", WriteFCom>, PS, EVEX, VEX_LIG, 8698 EVEX_CD8<32, CD8VT1>; 8699 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, 8700 "comisd", WriteFCom>, PD, EVEX, 8701 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8702 } 8703 let isCodeGenOnly = 1 in { 8704 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 8705 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, 8706 EVEX_CD8<32, CD8VT1>; 8707 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 8708 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, 8709 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8710 8711 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 8712 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, 8713 EVEX_CD8<32, CD8VT1>; 8714 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 8715 sse_load_f64, "comisd", WriteFCom>, PD, EVEX, 8716 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8717 } 8718} 8719 8720/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd 8721multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8722 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8723 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 8724 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8725 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8726 "$src2, $src1", "$src1, $src2", 8727 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8728 EVEX_4V, VEX_LIG, Sched<[sched]>; 8729 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8730 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8731 "$src2, $src1", "$src1, $src2", 8732 (OpNode (_.VT _.RC:$src1), 8733 _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG, 8734 Sched<[sched.Folded, sched.ReadAfterFold]>; 8735} 8736} 8737 8738defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 8739 f32x_info>, EVEX_CD8<32, CD8VT1>, 8740 T8PD; 8741defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 8742 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 8743 T8PD; 8744defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 8745 SchedWriteFRsqrt.Scl, f32x_info>, 8746 EVEX_CD8<32, CD8VT1>, T8PD; 8747defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 8748 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 8749 EVEX_CD8<64, CD8VT1>, T8PD; 8750 8751/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 8752multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 8753 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8754 let ExeDomain = _.ExeDomain in { 8755 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8756 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8757 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 8758 Sched<[sched]>; 8759 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8760 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8761 (OpNode (_.VT 8762 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 8763 Sched<[sched.Folded, sched.ReadAfterFold]>; 8764 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8765 (ins _.ScalarMemOp:$src), OpcodeStr, 8766 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8767 (OpNode (_.VT 8768 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, 8769 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8770 } 8771} 8772 8773multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 8774 X86SchedWriteWidths sched> { 8775 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM, 8776 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 8777 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM, 8778 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8779 8780 // Define only if AVX512VL feature is present. 8781 let Predicates = [HasVLX] in { 8782 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8783 OpNode, sched.XMM, v4f32x_info>, 8784 EVEX_V128, EVEX_CD8<32, CD8VF>; 8785 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8786 OpNode, sched.YMM, v8f32x_info>, 8787 EVEX_V256, EVEX_CD8<32, CD8VF>; 8788 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8789 OpNode, sched.XMM, v2f64x_info>, 8790 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 8791 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8792 OpNode, sched.YMM, v4f64x_info>, 8793 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 8794 } 8795} 8796 8797defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; 8798defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; 8799 8800/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 8801multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 8802 SDNode OpNode, SDNode OpNodeSAE, 8803 X86FoldableSchedWrite sched> { 8804 let ExeDomain = _.ExeDomain in { 8805 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8806 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8807 "$src2, $src1", "$src1, $src2", 8808 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8809 Sched<[sched]>; 8810 8811 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8812 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8813 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 8814 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8815 EVEX_B, Sched<[sched]>; 8816 8817 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8818 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8819 "$src2, $src1", "$src1, $src2", 8820 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>, 8821 Sched<[sched.Folded, sched.ReadAfterFold]>; 8822 } 8823} 8824 8825multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8826 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 8827 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 8828 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG; 8829 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 8830 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 8831} 8832 8833let Predicates = [HasERI] in { 8834 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 8835 SchedWriteFRcp.Scl>, T8PD, EVEX_4V; 8836 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 8837 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; 8838} 8839 8840defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 8841 SchedWriteFRnd.Scl>, T8PD, EVEX_4V; 8842/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 8843 8844multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8845 SDNode OpNode, X86FoldableSchedWrite sched> { 8846 let ExeDomain = _.ExeDomain in { 8847 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8848 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8849 (OpNode (_.VT _.RC:$src))>, 8850 Sched<[sched]>; 8851 8852 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8853 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8854 (OpNode (_.VT 8855 (bitconvert (_.LdFrag addr:$src))))>, 8856 Sched<[sched.Folded, sched.ReadAfterFold]>; 8857 8858 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8859 (ins _.ScalarMemOp:$src), OpcodeStr, 8860 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8861 (OpNode (_.VT 8862 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, 8863 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8864 } 8865} 8866multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8867 SDNode OpNode, X86FoldableSchedWrite sched> { 8868 let ExeDomain = _.ExeDomain in 8869 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8870 (ins _.RC:$src), OpcodeStr, 8871 "{sae}, $src", "$src, {sae}", 8872 (OpNode (_.VT _.RC:$src))>, 8873 EVEX_B, Sched<[sched]>; 8874} 8875 8876multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 8877 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8878 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8879 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 8880 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 8881 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8882 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 8883 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8884} 8885 8886multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 8887 SDNode OpNode, X86SchedWriteWidths sched> { 8888 // Define only if AVX512VL feature is present. 8889 let Predicates = [HasVLX] in { 8890 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 8891 sched.XMM>, 8892 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 8893 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 8894 sched.YMM>, 8895 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 8896 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 8897 sched.XMM>, 8898 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8899 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 8900 sched.YMM>, 8901 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8902 } 8903} 8904 8905let Predicates = [HasERI] in { 8906 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 8907 SchedWriteFRsqrt>, EVEX; 8908 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 8909 SchedWriteFRcp>, EVEX; 8910 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 8911 SchedWriteFAdd>, EVEX; 8912} 8913defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 8914 SchedWriteFRnd>, 8915 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 8916 SchedWriteFRnd>, EVEX; 8917 8918multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 8919 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8920 let ExeDomain = _.ExeDomain in 8921 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8922 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 8923 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 8924 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8925} 8926 8927multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 8928 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8929 let ExeDomain = _.ExeDomain in { 8930 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8931 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8932 (_.VT (fsqrt _.RC:$src))>, EVEX, 8933 Sched<[sched]>; 8934 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8935 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8936 (fsqrt (_.VT 8937 (bitconvert (_.LdFrag addr:$src))))>, EVEX, 8938 Sched<[sched.Folded, sched.ReadAfterFold]>; 8939 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8940 (ins _.ScalarMemOp:$src), OpcodeStr, 8941 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8942 (fsqrt (_.VT 8943 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, 8944 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8945 } 8946} 8947 8948multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 8949 X86SchedWriteSizes sched> { 8950 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8951 sched.PS.ZMM, v16f32_info>, 8952 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8953 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8954 sched.PD.ZMM, v8f64_info>, 8955 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8956 // Define only if AVX512VL feature is present. 8957 let Predicates = [HasVLX] in { 8958 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8959 sched.PS.XMM, v4f32x_info>, 8960 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 8961 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8962 sched.PS.YMM, v8f32x_info>, 8963 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 8964 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8965 sched.PD.XMM, v2f64x_info>, 8966 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8967 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8968 sched.PD.YMM, v4f64x_info>, 8969 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8970 } 8971} 8972 8973multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 8974 X86SchedWriteSizes sched> { 8975 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 8976 sched.PS.ZMM, v16f32_info>, 8977 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8978 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 8979 sched.PD.ZMM, v8f64_info>, 8980 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8981} 8982 8983multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 8984 X86VectorVTInfo _, string Name> { 8985 let ExeDomain = _.ExeDomain in { 8986 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8987 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8988 "$src2, $src1", "$src1, $src2", 8989 (X86fsqrts (_.VT _.RC:$src1), 8990 (_.VT _.RC:$src2))>, 8991 Sched<[sched]>; 8992 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8993 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8994 "$src2, $src1", "$src1, $src2", 8995 (X86fsqrts (_.VT _.RC:$src1), 8996 _.ScalarIntMemCPat:$src2)>, 8997 Sched<[sched.Folded, sched.ReadAfterFold]>; 8998 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8999 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9000 "$rc, $src2, $src1", "$src1, $src2, $rc", 9001 (X86fsqrtRnds (_.VT _.RC:$src1), 9002 (_.VT _.RC:$src2), 9003 (i32 timm:$rc))>, 9004 EVEX_B, EVEX_RC, Sched<[sched]>; 9005 9006 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { 9007 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9008 (ins _.FRC:$src1, _.FRC:$src2), 9009 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9010 Sched<[sched]>; 9011 let mayLoad = 1 in 9012 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9013 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9014 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9015 Sched<[sched.Folded, sched.ReadAfterFold]>; 9016 } 9017 } 9018 9019 let Predicates = [HasAVX512] in { 9020 def : Pat<(_.EltVT (fsqrt _.FRC:$src)), 9021 (!cast<Instruction>(Name#Zr) 9022 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9023 } 9024 9025 let Predicates = [HasAVX512, OptForSize] in { 9026 def : Pat<(_.EltVT (fsqrt (load addr:$src))), 9027 (!cast<Instruction>(Name#Zm) 9028 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9029 } 9030} 9031 9032multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9033 X86SchedWriteSizes sched> { 9034 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9035 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9036 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9037 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9038} 9039 9040defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9041 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9042 9043defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9044 9045multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9046 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9047 let ExeDomain = _.ExeDomain in { 9048 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9049 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9050 "$src3, $src2, $src1", "$src1, $src2, $src3", 9051 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9052 (i32 imm:$src3)))>, 9053 Sched<[sched]>; 9054 9055 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9056 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9057 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9058 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9059 (i32 imm:$src3)))>, EVEX_B, 9060 Sched<[sched]>; 9061 9062 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9063 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9064 OpcodeStr, 9065 "$src3, $src2, $src1", "$src1, $src2, $src3", 9066 (_.VT (X86RndScales _.RC:$src1, 9067 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, 9068 Sched<[sched.Folded, sched.ReadAfterFold]>; 9069 9070 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9071 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9072 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9073 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9074 []>, Sched<[sched]>; 9075 9076 let mayLoad = 1 in 9077 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9078 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9079 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9080 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 9081 } 9082 } 9083 9084 let Predicates = [HasAVX512] in { 9085 def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2), 9086 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 9087 _.FRC:$src1, imm:$src2))>; 9088 } 9089 9090 let Predicates = [HasAVX512, OptForSize] in { 9091 def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2), 9092 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 9093 addr:$src1, imm:$src2))>; 9094 } 9095} 9096 9097defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9098 SchedWriteFRnd.Scl, f32x_info>, 9099 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9100 EVEX_CD8<32, CD8VT1>; 9101 9102defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9103 SchedWriteFRnd.Scl, f64x_info>, 9104 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9105 EVEX_CD8<64, CD8VT1>; 9106 9107multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9108 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9109 dag OutMask, Predicate BasePredicate> { 9110 let Predicates = [BasePredicate] in { 9111 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 9112 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9113 (extractelt _.VT:$dst, (iPTR 0))))), 9114 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9115 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9116 9117 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 9118 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9119 ZeroFP))), 9120 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9121 OutMask, _.VT:$src2, _.VT:$src1)>; 9122 } 9123} 9124 9125defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9126 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9127 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9128defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9129 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9130 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9131 9132 9133//------------------------------------------------- 9134// Integer truncate and extend operations 9135//------------------------------------------------- 9136 9137// PatFrags that contain a select and a truncate op. The take operands in the 9138// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9139// either to the multiclasses. 9140def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9141 (vselect node:$mask, 9142 (trunc node:$src), node:$src0)>; 9143def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9144 (vselect node:$mask, 9145 (X86vtruncs node:$src), node:$src0)>; 9146def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9147 (vselect node:$mask, 9148 (X86vtruncus node:$src), node:$src0)>; 9149 9150multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9151 SDPatternOperator MaskNode, 9152 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9153 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9154 let ExeDomain = DestInfo.ExeDomain in { 9155 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9156 (ins SrcInfo.RC:$src), 9157 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9158 [(set DestInfo.RC:$dst, 9159 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9160 EVEX, Sched<[sched]>; 9161 let Constraints = "$src0 = $dst" in 9162 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9163 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9164 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9165 [(set DestInfo.RC:$dst, 9166 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9167 (DestInfo.VT DestInfo.RC:$src0), 9168 SrcInfo.KRCWM:$mask))]>, 9169 EVEX, EVEX_K, Sched<[sched]>; 9170 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9171 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9172 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9173 [(set DestInfo.RC:$dst, 9174 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9175 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9176 EVEX, EVEX_KZ, Sched<[sched]>; 9177 } 9178 9179 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9180 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9181 (ins x86memop:$dst, SrcInfo.RC:$src), 9182 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9183 EVEX, Sched<[sched.Folded]>; 9184 9185 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9186 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9187 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9188 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9189 }//mayStore = 1, hasSideEffects = 0 9190} 9191 9192multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9193 X86VectorVTInfo DestInfo, 9194 PatFrag truncFrag, PatFrag mtruncFrag, 9195 string Name> { 9196 9197 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9198 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr) 9199 addr:$dst, SrcInfo.RC:$src)>; 9200 9201 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9202 SrcInfo.KRCWM:$mask), 9203 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk) 9204 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9205} 9206 9207multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9208 SDNode OpNode256, SDNode OpNode512, 9209 SDPatternOperator MaskNode128, 9210 SDPatternOperator MaskNode256, 9211 SDPatternOperator MaskNode512, 9212 X86FoldableSchedWrite sched, 9213 AVX512VLVectorVTInfo VTSrcInfo, 9214 X86VectorVTInfo DestInfoZ128, 9215 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9216 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9217 X86MemOperand x86memopZ, PatFrag truncFrag, 9218 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9219 9220 let Predicates = [HasVLX, prd] in { 9221 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9222 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9223 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, 9224 truncFrag, mtruncFrag, NAME>, EVEX_V128; 9225 9226 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9227 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9228 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, 9229 truncFrag, mtruncFrag, NAME>, EVEX_V256; 9230 } 9231 let Predicates = [prd] in 9232 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9233 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9234 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, 9235 truncFrag, mtruncFrag, NAME>, EVEX_V512; 9236} 9237 9238multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9239 SDPatternOperator MaskNode, 9240 X86FoldableSchedWrite sched, PatFrag StoreNode, 9241 PatFrag MaskedStoreNode, SDNode InVecNode, 9242 SDPatternOperator InVecMaskNode> { 9243 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9244 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9245 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9246 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9247 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9248} 9249 9250multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9251 SDPatternOperator MaskNode, 9252 X86FoldableSchedWrite sched, PatFrag StoreNode, 9253 PatFrag MaskedStoreNode, SDNode InVecNode, 9254 SDPatternOperator InVecMaskNode> { 9255 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9256 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9257 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9258 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9259 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9260} 9261 9262multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9263 SDPatternOperator MaskNode, 9264 X86FoldableSchedWrite sched, PatFrag StoreNode, 9265 PatFrag MaskedStoreNode, SDNode InVecNode, 9266 SDPatternOperator InVecMaskNode> { 9267 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9268 InVecMaskNode, MaskNode, MaskNode, sched, 9269 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9270 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9271 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9272} 9273 9274multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9275 SDPatternOperator MaskNode, 9276 X86FoldableSchedWrite sched, PatFrag StoreNode, 9277 PatFrag MaskedStoreNode, SDNode InVecNode, 9278 SDPatternOperator InVecMaskNode> { 9279 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9280 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9281 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9282 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9283 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9284} 9285 9286multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9287 SDPatternOperator MaskNode, 9288 X86FoldableSchedWrite sched, PatFrag StoreNode, 9289 PatFrag MaskedStoreNode, SDNode InVecNode, 9290 SDPatternOperator InVecMaskNode> { 9291 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9292 InVecMaskNode, MaskNode, MaskNode, sched, 9293 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9294 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9295 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9296} 9297 9298multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9299 SDPatternOperator MaskNode, 9300 X86FoldableSchedWrite sched, PatFrag StoreNode, 9301 PatFrag MaskedStoreNode, SDNode InVecNode, 9302 SDPatternOperator InVecMaskNode> { 9303 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9304 InVecMaskNode, MaskNode, MaskNode, sched, 9305 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9306 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9307 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9308} 9309 9310defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc, 9311 WriteShuffle256, truncstorevi8, 9312 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9313defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs, 9314 WriteShuffle256, truncstore_s_vi8, 9315 masked_truncstore_s_vi8, X86vtruncs, 9316 X86vmtruncs>; 9317defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, 9318 select_truncus, WriteShuffle256, 9319 truncstore_us_vi8, masked_truncstore_us_vi8, 9320 X86vtruncus, X86vmtruncus>; 9321 9322defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9323 WriteShuffle256, truncstorevi16, 9324 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9325defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9326 WriteShuffle256, truncstore_s_vi16, 9327 masked_truncstore_s_vi16, X86vtruncs, 9328 X86vmtruncs>; 9329defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9330 select_truncus, WriteShuffle256, 9331 truncstore_us_vi16, masked_truncstore_us_vi16, 9332 X86vtruncus, X86vmtruncus>; 9333 9334defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9335 WriteShuffle256, truncstorevi32, 9336 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9337defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9338 WriteShuffle256, truncstore_s_vi32, 9339 masked_truncstore_s_vi32, X86vtruncs, 9340 X86vmtruncs>; 9341defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9342 select_truncus, WriteShuffle256, 9343 truncstore_us_vi32, masked_truncstore_us_vi32, 9344 X86vtruncus, X86vmtruncus>; 9345 9346defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9347 WriteShuffle256, truncstorevi8, 9348 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9349defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9350 WriteShuffle256, truncstore_s_vi8, 9351 masked_truncstore_s_vi8, X86vtruncs, 9352 X86vmtruncs>; 9353defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9354 select_truncus, WriteShuffle256, 9355 truncstore_us_vi8, masked_truncstore_us_vi8, 9356 X86vtruncus, X86vmtruncus>; 9357 9358defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9359 WriteShuffle256, truncstorevi16, 9360 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9361defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9362 WriteShuffle256, truncstore_s_vi16, 9363 masked_truncstore_s_vi16, X86vtruncs, 9364 X86vmtruncs>; 9365defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9366 select_truncus, WriteShuffle256, 9367 truncstore_us_vi16, masked_truncstore_us_vi16, 9368 X86vtruncus, X86vmtruncus>; 9369 9370defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9371 WriteShuffle256, truncstorevi8, 9372 masked_truncstorevi8, X86vtrunc, 9373 X86vmtrunc>; 9374defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9375 WriteShuffle256, truncstore_s_vi8, 9376 masked_truncstore_s_vi8, X86vtruncs, 9377 X86vmtruncs>; 9378defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9379 select_truncus, WriteShuffle256, 9380 truncstore_us_vi8, masked_truncstore_us_vi8, 9381 X86vtruncus, X86vmtruncus>; 9382 9383let Predicates = [HasAVX512, NoVLX] in { 9384def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9385 (v8i16 (EXTRACT_SUBREG 9386 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9387 VR256X:$src, sub_ymm)))), sub_xmm))>; 9388def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9389 (v4i32 (EXTRACT_SUBREG 9390 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9391 VR256X:$src, sub_ymm)))), sub_xmm))>; 9392} 9393 9394let Predicates = [HasBWI, NoVLX] in { 9395def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9396 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9397 VR256X:$src, sub_ymm))), sub_xmm))>; 9398} 9399 9400// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9401multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9402 X86VectorVTInfo DestInfo, 9403 X86VectorVTInfo SrcInfo> { 9404 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9405 DestInfo.RC:$src0, 9406 SrcInfo.KRCWM:$mask)), 9407 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9408 SrcInfo.KRCWM:$mask, 9409 SrcInfo.RC:$src)>; 9410 9411 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9412 DestInfo.ImmAllZerosV, 9413 SrcInfo.KRCWM:$mask)), 9414 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9415 SrcInfo.RC:$src)>; 9416} 9417 9418let Predicates = [HasVLX] in { 9419defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9420defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9421defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9422} 9423 9424let Predicates = [HasAVX512] in { 9425defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9426defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9427defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9428 9429defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9430defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9431defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9432 9433defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9434defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9435defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9436} 9437 9438multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9439 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9440 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9441 let ExeDomain = DestInfo.ExeDomain in { 9442 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9443 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9444 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9445 EVEX, Sched<[sched]>; 9446 9447 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9448 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9449 (DestInfo.VT (LdFrag addr:$src))>, 9450 EVEX, Sched<[sched.Folded]>; 9451 } 9452} 9453 9454multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 9455 SDNode OpNode, SDNode InVecNode, string ExtTy, 9456 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9457 let Predicates = [HasVLX, HasBWI] in { 9458 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 9459 v16i8x_info, i64mem, LdFrag, InVecNode>, 9460 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9461 9462 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 9463 v16i8x_info, i128mem, LdFrag, OpNode>, 9464 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9465 } 9466 let Predicates = [HasBWI] in { 9467 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 9468 v32i8x_info, i256mem, LdFrag, OpNode>, 9469 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9470 } 9471} 9472 9473multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 9474 SDNode OpNode, SDNode InVecNode, string ExtTy, 9475 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9476 let Predicates = [HasVLX, HasAVX512] in { 9477 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9478 v16i8x_info, i32mem, LdFrag, InVecNode>, 9479 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9480 9481 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9482 v16i8x_info, i64mem, LdFrag, InVecNode>, 9483 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9484 } 9485 let Predicates = [HasAVX512] in { 9486 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9487 v16i8x_info, i128mem, LdFrag, OpNode>, 9488 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9489 } 9490} 9491 9492multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 9493 SDNode OpNode, SDNode InVecNode, string ExtTy, 9494 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9495 let Predicates = [HasVLX, HasAVX512] in { 9496 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9497 v16i8x_info, i16mem, LdFrag, InVecNode>, 9498 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 9499 9500 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9501 v16i8x_info, i32mem, LdFrag, InVecNode>, 9502 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 9503 } 9504 let Predicates = [HasAVX512] in { 9505 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9506 v16i8x_info, i64mem, LdFrag, InVecNode>, 9507 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 9508 } 9509} 9510 9511multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 9512 SDNode OpNode, SDNode InVecNode, string ExtTy, 9513 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9514 let Predicates = [HasVLX, HasAVX512] in { 9515 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9516 v8i16x_info, i64mem, LdFrag, InVecNode>, 9517 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9518 9519 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9520 v8i16x_info, i128mem, LdFrag, OpNode>, 9521 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9522 } 9523 let Predicates = [HasAVX512] in { 9524 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9525 v16i16x_info, i256mem, LdFrag, OpNode>, 9526 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9527 } 9528} 9529 9530multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 9531 SDNode OpNode, SDNode InVecNode, string ExtTy, 9532 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9533 let Predicates = [HasVLX, HasAVX512] in { 9534 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9535 v8i16x_info, i32mem, LdFrag, InVecNode>, 9536 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9537 9538 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9539 v8i16x_info, i64mem, LdFrag, InVecNode>, 9540 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9541 } 9542 let Predicates = [HasAVX512] in { 9543 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9544 v8i16x_info, i128mem, LdFrag, OpNode>, 9545 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9546 } 9547} 9548 9549multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 9550 SDNode OpNode, SDNode InVecNode, string ExtTy, 9551 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9552 9553 let Predicates = [HasVLX, HasAVX512] in { 9554 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9555 v4i32x_info, i64mem, LdFrag, InVecNode>, 9556 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 9557 9558 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9559 v4i32x_info, i128mem, LdFrag, OpNode>, 9560 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 9561 } 9562 let Predicates = [HasAVX512] in { 9563 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9564 v8i32x_info, i256mem, LdFrag, OpNode>, 9565 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 9566 } 9567} 9568 9569defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>; 9570defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>; 9571defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>; 9572defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>; 9573defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>; 9574defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>; 9575 9576defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>; 9577defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>; 9578defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>; 9579defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>; 9580defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>; 9581defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>; 9582 9583 9584// Patterns that we also need any extend versions of. aext_vector_inreg 9585// is currently legalized to zext_vector_inreg. 9586multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 9587 // 256-bit patterns 9588 let Predicates = [HasVLX, HasBWI] in { 9589 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 9590 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9591 } 9592 9593 let Predicates = [HasVLX] in { 9594 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 9595 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9596 9597 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 9598 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9599 } 9600 9601 // 512-bit patterns 9602 let Predicates = [HasBWI] in { 9603 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 9604 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 9605 } 9606 let Predicates = [HasAVX512] in { 9607 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 9608 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 9609 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 9610 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 9611 9612 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 9613 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 9614 9615 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 9616 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 9617 } 9618} 9619 9620multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 9621 SDNode InVecOp> : 9622 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 9623 // 128-bit patterns 9624 let Predicates = [HasVLX, HasBWI] in { 9625 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9626 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9627 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9628 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9629 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9630 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9631 } 9632 let Predicates = [HasVLX] in { 9633 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9634 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9635 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9636 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9637 9638 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 9639 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9640 9641 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9642 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9643 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9644 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9645 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9646 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9647 9648 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9649 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9650 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 9651 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9652 9653 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9654 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9655 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9656 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9657 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9658 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9659 } 9660 let Predicates = [HasVLX] in { 9661 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9662 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9663 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9664 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9665 9666 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9667 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9668 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9669 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9670 9671 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9672 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9673 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9674 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9675 } 9676 // 512-bit patterns 9677 let Predicates = [HasAVX512] in { 9678 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9679 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9680 } 9681} 9682 9683defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 9684defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 9685 9686// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 9687// ext+trunc aggresively making it impossible to legalize the DAG to this 9688// pattern directly. 9689let Predicates = [HasAVX512, NoBWI] in { 9690def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9691 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 9692def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 9693 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 9694} 9695 9696//===----------------------------------------------------------------------===// 9697// GATHER - SCATTER Operations 9698 9699// FIXME: Improve scheduling of gather/scatter instructions. 9700multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9701 X86MemOperand memop, PatFrag GatherNode, 9702 RegisterClass MaskRC = _.KRCWM> { 9703 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 9704 ExeDomain = _.ExeDomain in 9705 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 9706 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 9707 !strconcat(OpcodeStr#_.Suffix, 9708 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 9709 [(set _.RC:$dst, MaskRC:$mask_wb, 9710 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask, 9711 vectoraddr:$src2))]>, EVEX, EVEX_K, 9712 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; 9713} 9714 9715multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 9716 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9717 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, 9718 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W; 9719 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512, 9720 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W; 9721let Predicates = [HasVLX] in { 9722 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9723 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W; 9724 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256, 9725 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W; 9726 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9727 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W; 9728 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9729 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W; 9730} 9731} 9732 9733multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 9734 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9735 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem, 9736 mgatherv16i32>, EVEX_V512; 9737 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem, 9738 mgatherv8i64>, EVEX_V512; 9739let Predicates = [HasVLX] in { 9740 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9741 vy256xmem, mgatherv8i32>, EVEX_V256; 9742 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9743 vy128xmem, mgatherv4i64>, EVEX_V256; 9744 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9745 vx128xmem, mgatherv4i32>, EVEX_V128; 9746 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9747 vx64xmem, mgatherv2i64, VK2WM>, 9748 EVEX_V128; 9749} 9750} 9751 9752 9753defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 9754 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 9755 9756defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 9757 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 9758 9759multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9760 X86MemOperand memop, PatFrag ScatterNode, 9761 RegisterClass MaskRC = _.KRCWM> { 9762 9763let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in 9764 9765 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 9766 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 9767 !strconcat(OpcodeStr#_.Suffix, 9768 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 9769 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src), 9770 MaskRC:$mask, vectoraddr:$dst))]>, 9771 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9772 Sched<[WriteStore]>; 9773} 9774 9775multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 9776 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9777 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, 9778 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W; 9779 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512, 9780 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W; 9781let Predicates = [HasVLX] in { 9782 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9783 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W; 9784 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256, 9785 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W; 9786 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9787 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W; 9788 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9789 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W; 9790} 9791} 9792 9793multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 9794 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9795 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem, 9796 mscatterv16i32>, EVEX_V512; 9797 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem, 9798 mscatterv8i64>, EVEX_V512; 9799let Predicates = [HasVLX] in { 9800 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9801 vy256xmem, mscatterv8i32>, EVEX_V256; 9802 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9803 vy128xmem, mscatterv4i64>, EVEX_V256; 9804 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9805 vx128xmem, mscatterv4i32>, EVEX_V128; 9806 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9807 vx64xmem, mscatterv2i64, VK2WM>, 9808 EVEX_V128; 9809} 9810} 9811 9812defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 9813 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 9814 9815defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 9816 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 9817 9818// prefetch 9819multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 9820 RegisterClass KRC, X86MemOperand memop> { 9821 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 9822 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 9823 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 9824 EVEX, EVEX_K, Sched<[WriteLoad]>; 9825} 9826 9827defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 9828 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9829 9830defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 9831 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9832 9833defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 9834 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9835 9836defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 9837 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9838 9839defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 9840 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9841 9842defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 9843 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9844 9845defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 9846 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9847 9848defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 9849 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9850 9851defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 9852 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9853 9854defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 9855 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9856 9857defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 9858 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9859 9860defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 9861 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9862 9863defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 9864 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9865 9866defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 9867 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9868 9869defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 9870 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9871 9872defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 9873 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9874 9875multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 9876def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 9877 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 9878 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 9879 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 9880 9881// Also need a pattern for anyextend. 9882def : Pat<(Vec.VT (anyext Vec.KRC:$src)), 9883 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>; 9884} 9885 9886multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 9887 string OpcodeStr, Predicate prd> { 9888let Predicates = [prd] in 9889 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 9890 9891 let Predicates = [prd, HasVLX] in { 9892 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 9893 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 9894 } 9895} 9896 9897defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 9898defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 9899defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 9900defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 9901 9902multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 9903 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 9904 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 9905 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 9906 EVEX, Sched<[WriteMove]>; 9907} 9908 9909// Use 512bit version to implement 128/256 bit in case NoVLX. 9910multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 9911 X86VectorVTInfo _, 9912 string Name> { 9913 9914 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 9915 (_.KVT (COPY_TO_REGCLASS 9916 (!cast<Instruction>(Name#"Zrr") 9917 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 9918 _.RC:$src, _.SubRegIdx)), 9919 _.KRC))>; 9920} 9921 9922multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 9923 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 9924 let Predicates = [prd] in 9925 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 9926 EVEX_V512; 9927 9928 let Predicates = [prd, HasVLX] in { 9929 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 9930 EVEX_V256; 9931 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 9932 EVEX_V128; 9933 } 9934 let Predicates = [prd, NoVLX] in { 9935 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 9936 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 9937 } 9938} 9939 9940defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 9941 avx512vl_i8_info, HasBWI>; 9942defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 9943 avx512vl_i16_info, HasBWI>, VEX_W; 9944defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 9945 avx512vl_i32_info, HasDQI>; 9946defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 9947 avx512vl_i64_info, HasDQI>, VEX_W; 9948 9949// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 9950// is available, but BWI is not. We can't handle this in lowering because 9951// a target independent DAG combine likes to combine sext and trunc. 9952let Predicates = [HasDQI, NoBWI] in { 9953 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 9954 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9955 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 9956 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9957 9958 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))), 9959 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9960 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))), 9961 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9962} 9963 9964let Predicates = [HasDQI, NoBWI, HasVLX] in { 9965 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 9966 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9967 9968 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))), 9969 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9970} 9971 9972//===----------------------------------------------------------------------===// 9973// AVX-512 - COMPRESS and EXPAND 9974// 9975 9976multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 9977 string OpcodeStr, X86FoldableSchedWrite sched> { 9978 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 9979 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9980 (null_frag)>, AVX5128IBase, 9981 Sched<[sched]>; 9982 9983 let mayStore = 1, hasSideEffects = 0 in 9984 def mr : AVX5128I<opc, MRMDestMem, (outs), 9985 (ins _.MemOp:$dst, _.RC:$src), 9986 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9987 []>, EVEX_CD8<_.EltSize, CD8VT1>, 9988 Sched<[sched.Folded]>; 9989 9990 def mrk : AVX5128I<opc, MRMDestMem, (outs), 9991 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 9992 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9993 []>, 9994 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9995 Sched<[sched.Folded]>; 9996} 9997 9998multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9999 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10000 (!cast<Instruction>(Name#_.ZSuffix##mrk) 10001 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10002 10003 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10004 (!cast<Instruction>(Name#_.ZSuffix##rrk) 10005 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10006 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10007 (!cast<Instruction>(Name#_.ZSuffix##rrkz) 10008 _.KRCWM:$mask, _.RC:$src)>; 10009} 10010 10011multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10012 X86FoldableSchedWrite sched, 10013 AVX512VLVectorVTInfo VTInfo, 10014 Predicate Pred = HasAVX512> { 10015 let Predicates = [Pred] in 10016 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10017 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10018 10019 let Predicates = [Pred, HasVLX] in { 10020 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10021 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10022 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10023 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10024 } 10025} 10026 10027// FIXME: Is there a better scheduler class for VPCOMPRESS? 10028defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10029 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 10030defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10031 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 10032defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10033 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 10034defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10035 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 10036 10037// expand 10038multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10039 string OpcodeStr, X86FoldableSchedWrite sched> { 10040 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10041 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10042 (null_frag)>, AVX5128IBase, 10043 Sched<[sched]>; 10044 10045 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10046 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10047 (null_frag)>, 10048 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10049 Sched<[sched.Folded, sched.ReadAfterFold]>; 10050} 10051 10052multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10053 10054 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10055 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 10056 _.KRCWM:$mask, addr:$src)>; 10057 10058 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10059 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 10060 _.KRCWM:$mask, addr:$src)>; 10061 10062 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10063 (_.VT _.RC:$src0))), 10064 (!cast<Instruction>(Name#_.ZSuffix##rmk) 10065 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10066 10067 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10068 (!cast<Instruction>(Name#_.ZSuffix##rrk) 10069 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10070 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10071 (!cast<Instruction>(Name#_.ZSuffix##rrkz) 10072 _.KRCWM:$mask, _.RC:$src)>; 10073} 10074 10075multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10076 X86FoldableSchedWrite sched, 10077 AVX512VLVectorVTInfo VTInfo, 10078 Predicate Pred = HasAVX512> { 10079 let Predicates = [Pred] in 10080 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10081 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10082 10083 let Predicates = [Pred, HasVLX] in { 10084 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10085 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10086 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10087 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10088 } 10089} 10090 10091// FIXME: Is there a better scheduler class for VPEXPAND? 10092defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10093 avx512vl_i32_info>, EVEX; 10094defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10095 avx512vl_i64_info>, EVEX, VEX_W; 10096defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10097 avx512vl_f32_info>, EVEX; 10098defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10099 avx512vl_f64_info>, EVEX, VEX_W; 10100 10101//handle instruction reg_vec1 = op(reg_vec,imm) 10102// op(mem_vec,imm) 10103// op(broadcast(eltVt),imm) 10104//all instruction created with FROUND_CURRENT 10105multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10106 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10107 let ExeDomain = _.ExeDomain in { 10108 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10109 (ins _.RC:$src1, i32u8imm:$src2), 10110 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 10111 (OpNode (_.VT _.RC:$src1), 10112 (i32 imm:$src2))>, Sched<[sched]>; 10113 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10114 (ins _.MemOp:$src1, i32u8imm:$src2), 10115 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 10116 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10117 (i32 imm:$src2))>, 10118 Sched<[sched.Folded, sched.ReadAfterFold]>; 10119 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10120 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10121 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, 10122 "${src1}"##_.BroadcastStr##", $src2", 10123 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))), 10124 (i32 imm:$src2))>, EVEX_B, 10125 Sched<[sched.Folded, sched.ReadAfterFold]>; 10126 } 10127} 10128 10129//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10130multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10131 SDNode OpNode, X86FoldableSchedWrite sched, 10132 X86VectorVTInfo _> { 10133 let ExeDomain = _.ExeDomain in 10134 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10135 (ins _.RC:$src1, i32u8imm:$src2), 10136 OpcodeStr##_.Suffix, "$src2, {sae}, $src1", 10137 "$src1, {sae}, $src2", 10138 (OpNode (_.VT _.RC:$src1), 10139 (i32 imm:$src2))>, 10140 EVEX_B, Sched<[sched]>; 10141} 10142 10143multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10144 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10145 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10146 let Predicates = [prd] in { 10147 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, 10148 _.info512>, 10149 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10150 sched.ZMM, _.info512>, EVEX_V512; 10151 } 10152 let Predicates = [prd, HasVLX] in { 10153 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, 10154 _.info128>, EVEX_V128; 10155 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, 10156 _.info256>, EVEX_V256; 10157 } 10158} 10159 10160//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10161// op(reg_vec2,mem_vec,imm) 10162// op(reg_vec2,broadcast(eltVt),imm) 10163//all instruction created with FROUND_CURRENT 10164multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10165 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10166 let ExeDomain = _.ExeDomain in { 10167 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10168 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10169 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10170 (OpNode (_.VT _.RC:$src1), 10171 (_.VT _.RC:$src2), 10172 (i32 imm:$src3))>, 10173 Sched<[sched]>; 10174 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10175 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10176 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10177 (OpNode (_.VT _.RC:$src1), 10178 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10179 (i32 imm:$src3))>, 10180 Sched<[sched.Folded, sched.ReadAfterFold]>; 10181 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10182 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10183 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10184 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10185 (OpNode (_.VT _.RC:$src1), 10186 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 10187 (i32 imm:$src3))>, EVEX_B, 10188 Sched<[sched.Folded, sched.ReadAfterFold]>; 10189 } 10190} 10191 10192//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10193// op(reg_vec2,mem_vec,imm) 10194multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10195 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10196 X86VectorVTInfo SrcInfo>{ 10197 let ExeDomain = DestInfo.ExeDomain in { 10198 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10199 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10200 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10201 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10202 (SrcInfo.VT SrcInfo.RC:$src2), 10203 (i8 imm:$src3)))>, 10204 Sched<[sched]>; 10205 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10206 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10207 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10208 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10209 (SrcInfo.VT (bitconvert 10210 (SrcInfo.LdFrag addr:$src2))), 10211 (i8 imm:$src3)))>, 10212 Sched<[sched.Folded, sched.ReadAfterFold]>; 10213 } 10214} 10215 10216//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10217// op(reg_vec2,mem_vec,imm) 10218// op(reg_vec2,broadcast(eltVt),imm) 10219multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10220 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10221 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10222 10223 let ExeDomain = _.ExeDomain in 10224 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10225 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10226 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10227 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10228 (OpNode (_.VT _.RC:$src1), 10229 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 10230 (i8 imm:$src3))>, EVEX_B, 10231 Sched<[sched.Folded, sched.ReadAfterFold]>; 10232} 10233 10234//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10235// op(reg_vec2,mem_scalar,imm) 10236multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10237 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10238 let ExeDomain = _.ExeDomain in { 10239 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10240 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10241 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10242 (OpNode (_.VT _.RC:$src1), 10243 (_.VT _.RC:$src2), 10244 (i32 imm:$src3))>, 10245 Sched<[sched]>; 10246 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10247 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10248 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10249 (OpNode (_.VT _.RC:$src1), 10250 (_.VT (scalar_to_vector 10251 (_.ScalarLdFrag addr:$src2))), 10252 (i32 imm:$src3))>, 10253 Sched<[sched.Folded, sched.ReadAfterFold]>; 10254 } 10255} 10256 10257//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10258multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10259 SDNode OpNode, X86FoldableSchedWrite sched, 10260 X86VectorVTInfo _> { 10261 let ExeDomain = _.ExeDomain in 10262 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10263 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10264 OpcodeStr, "$src3, {sae}, $src2, $src1", 10265 "$src1, $src2, {sae}, $src3", 10266 (OpNode (_.VT _.RC:$src1), 10267 (_.VT _.RC:$src2), 10268 (i32 imm:$src3))>, 10269 EVEX_B, Sched<[sched]>; 10270} 10271 10272//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10273multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10274 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10275 let ExeDomain = _.ExeDomain in 10276 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10277 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10278 OpcodeStr, "$src3, {sae}, $src2, $src1", 10279 "$src1, $src2, {sae}, $src3", 10280 (OpNode (_.VT _.RC:$src1), 10281 (_.VT _.RC:$src2), 10282 (i32 imm:$src3))>, 10283 EVEX_B, Sched<[sched]>; 10284} 10285 10286multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10287 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10288 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10289 let Predicates = [prd] in { 10290 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10291 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10292 EVEX_V512; 10293 10294 } 10295 let Predicates = [prd, HasVLX] in { 10296 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10297 EVEX_V128; 10298 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10299 EVEX_V256; 10300 } 10301} 10302 10303multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10304 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10305 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10306 let Predicates = [Pred] in { 10307 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10308 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10309 } 10310 let Predicates = [Pred, HasVLX] in { 10311 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10312 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10313 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10314 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10315 } 10316} 10317 10318multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10319 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10320 Predicate Pred = HasAVX512> { 10321 let Predicates = [Pred] in { 10322 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10323 EVEX_V512; 10324 } 10325 let Predicates = [Pred, HasVLX] in { 10326 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10327 EVEX_V128; 10328 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10329 EVEX_V256; 10330 } 10331} 10332 10333multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10334 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10335 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10336 let Predicates = [prd] in { 10337 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10338 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10339 } 10340} 10341 10342multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10343 bits<8> opcPs, bits<8> opcPd, SDNode OpNode, 10344 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10345 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10346 opcPs, OpNode, OpNodeSAE, sched, prd>, 10347 EVEX_CD8<32, CD8VF>; 10348 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10349 opcPd, OpNode, OpNodeSAE, sched, prd>, 10350 EVEX_CD8<64, CD8VF>, VEX_W; 10351} 10352 10353defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10354 X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>, 10355 AVX512AIi8Base, EVEX; 10356defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10357 X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>, 10358 AVX512AIi8Base, EVEX; 10359defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10360 X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>, 10361 AVX512AIi8Base, EVEX; 10362 10363defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10364 0x50, X86VRange, X86VRangeSAE, 10365 SchedWriteFAdd, HasDQI>, 10366 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10367defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10368 0x50, X86VRange, X86VRangeSAE, 10369 SchedWriteFAdd, HasDQI>, 10370 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10371 10372defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10373 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10374 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10375defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10376 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10377 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10378 10379defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10380 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10381 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10382defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10383 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10384 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10385 10386defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10387 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10388 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10389defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10390 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10391 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10392 10393multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10394 X86FoldableSchedWrite sched, 10395 X86VectorVTInfo _, 10396 X86VectorVTInfo CastInfo, 10397 string EVEX2VEXOvrd> { 10398 let ExeDomain = _.ExeDomain in { 10399 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10400 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10401 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10402 (_.VT (bitconvert 10403 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10404 (i8 imm:$src3)))))>, 10405 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 10406 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10407 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10408 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10409 (_.VT 10410 (bitconvert 10411 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10412 (CastInfo.LdFrag addr:$src2), 10413 (i8 imm:$src3)))))>, 10414 Sched<[sched.Folded, sched.ReadAfterFold]>, 10415 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 10416 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10417 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10418 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10419 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10420 (_.VT 10421 (bitconvert 10422 (CastInfo.VT 10423 (X86Shuf128 _.RC:$src1, 10424 (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 10425 (i8 imm:$src3)))))>, EVEX_B, 10426 Sched<[sched.Folded, sched.ReadAfterFold]>; 10427 } 10428} 10429 10430multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10431 AVX512VLVectorVTInfo _, 10432 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 10433 string EVEX2VEXOvrd>{ 10434 let Predicates = [HasAVX512] in 10435 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10436 _.info512, CastInfo.info512, "">, EVEX_V512; 10437 10438 let Predicates = [HasAVX512, HasVLX] in 10439 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10440 _.info256, CastInfo.info256, 10441 EVEX2VEXOvrd>, EVEX_V256; 10442} 10443 10444defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10445 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10446defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10447 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10448defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10449 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10450defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10451 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10452 10453let Predicates = [HasAVX512] in { 10454// Provide fallback in case the load node that is used in the broadcast 10455// patterns above is used by additional users, which prevents the pattern 10456// selection. 10457def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 10458 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10459 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10460 0)>; 10461def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 10462 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10463 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10464 0)>; 10465 10466def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 10467 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10468 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10469 0)>; 10470def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 10471 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10472 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10473 0)>; 10474 10475def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 10476 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10477 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10478 0)>; 10479 10480def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 10481 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10482 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10483 0)>; 10484} 10485 10486multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10487 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10488 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 10489 // instantiation of this class. 10490 let ExeDomain = _.ExeDomain in { 10491 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10492 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10493 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10494 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>, 10495 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 10496 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10497 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10498 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10499 (_.VT (X86VAlign _.RC:$src1, 10500 (bitconvert (_.LdFrag addr:$src2)), 10501 (i8 imm:$src3)))>, 10502 Sched<[sched.Folded, sched.ReadAfterFold]>, 10503 EVEX2VEXOverride<"VPALIGNRrmi">; 10504 10505 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10506 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10507 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10508 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10509 (X86VAlign _.RC:$src1, 10510 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 10511 (i8 imm:$src3))>, EVEX_B, 10512 Sched<[sched.Folded, sched.ReadAfterFold]>; 10513 } 10514} 10515 10516multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10517 AVX512VLVectorVTInfo _> { 10518 let Predicates = [HasAVX512] in { 10519 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10520 AVX512AIi8Base, EVEX_4V, EVEX_V512; 10521 } 10522 let Predicates = [HasAVX512, HasVLX] in { 10523 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10524 AVX512AIi8Base, EVEX_4V, EVEX_V128; 10525 // We can't really override the 256-bit version so change it back to unset. 10526 let EVEX2VEXOverride = ? in 10527 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10528 AVX512AIi8Base, EVEX_4V, EVEX_V256; 10529 } 10530} 10531 10532defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10533 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10534defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10535 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10536 VEX_W; 10537 10538defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10539 SchedWriteShuffle, avx512vl_i8_info, 10540 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10541 10542// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10543// into vpalignr. 10544def ValignqImm32XForm : SDNodeXForm<imm, [{ 10545 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10546}]>; 10547def ValignqImm8XForm : SDNodeXForm<imm, [{ 10548 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10549}]>; 10550def ValigndImm8XForm : SDNodeXForm<imm, [{ 10551 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10552}]>; 10553 10554multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10555 X86VectorVTInfo From, X86VectorVTInfo To, 10556 SDNodeXForm ImmXForm> { 10557 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10558 (bitconvert 10559 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10560 imm:$src3))), 10561 To.RC:$src0)), 10562 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10563 To.RC:$src1, To.RC:$src2, 10564 (ImmXForm imm:$src3))>; 10565 10566 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10567 (bitconvert 10568 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10569 imm:$src3))), 10570 To.ImmAllZerosV)), 10571 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10572 To.RC:$src1, To.RC:$src2, 10573 (ImmXForm imm:$src3))>; 10574 10575 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10576 (bitconvert 10577 (From.VT (OpNode From.RC:$src1, 10578 (From.LdFrag addr:$src2), 10579 imm:$src3))), 10580 To.RC:$src0)), 10581 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10582 To.RC:$src1, addr:$src2, 10583 (ImmXForm imm:$src3))>; 10584 10585 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10586 (bitconvert 10587 (From.VT (OpNode From.RC:$src1, 10588 (From.LdFrag addr:$src2), 10589 imm:$src3))), 10590 To.ImmAllZerosV)), 10591 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 10592 To.RC:$src1, addr:$src2, 10593 (ImmXForm imm:$src3))>; 10594} 10595 10596multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 10597 X86VectorVTInfo From, 10598 X86VectorVTInfo To, 10599 SDNodeXForm ImmXForm> : 10600 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 10601 def : Pat<(From.VT (OpNode From.RC:$src1, 10602 (bitconvert (To.VT (X86VBroadcast 10603 (To.ScalarLdFrag addr:$src2)))), 10604 imm:$src3)), 10605 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 10606 (ImmXForm imm:$src3))>; 10607 10608 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10609 (bitconvert 10610 (From.VT (OpNode From.RC:$src1, 10611 (bitconvert 10612 (To.VT (X86VBroadcast 10613 (To.ScalarLdFrag addr:$src2)))), 10614 imm:$src3))), 10615 To.RC:$src0)), 10616 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 10617 To.RC:$src1, addr:$src2, 10618 (ImmXForm imm:$src3))>; 10619 10620 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10621 (bitconvert 10622 (From.VT (OpNode From.RC:$src1, 10623 (bitconvert 10624 (To.VT (X86VBroadcast 10625 (To.ScalarLdFrag addr:$src2)))), 10626 imm:$src3))), 10627 To.ImmAllZerosV)), 10628 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 10629 To.RC:$src1, addr:$src2, 10630 (ImmXForm imm:$src3))>; 10631} 10632 10633let Predicates = [HasAVX512] in { 10634 // For 512-bit we lower to the widest element type we can. So we only need 10635 // to handle converting valignq to valignd. 10636 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 10637 v16i32_info, ValignqImm32XForm>; 10638} 10639 10640let Predicates = [HasVLX] in { 10641 // For 128-bit we lower to the widest element type we can. So we only need 10642 // to handle converting valignq to valignd. 10643 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 10644 v4i32x_info, ValignqImm32XForm>; 10645 // For 256-bit we lower to the widest element type we can. So we only need 10646 // to handle converting valignq to valignd. 10647 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 10648 v8i32x_info, ValignqImm32XForm>; 10649} 10650 10651let Predicates = [HasVLX, HasBWI] in { 10652 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 10653 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 10654 v16i8x_info, ValignqImm8XForm>; 10655 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 10656 v16i8x_info, ValigndImm8XForm>; 10657} 10658 10659defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 10660 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 10661 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 10662 10663multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10664 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10665 let ExeDomain = _.ExeDomain in { 10666 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10667 (ins _.RC:$src1), OpcodeStr, 10668 "$src1", "$src1", 10669 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase, 10670 Sched<[sched]>; 10671 10672 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10673 (ins _.MemOp:$src1), OpcodeStr, 10674 "$src1", "$src1", 10675 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>, 10676 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 10677 Sched<[sched.Folded]>; 10678 } 10679} 10680 10681multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10682 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 10683 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 10684 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10685 (ins _.ScalarMemOp:$src1), OpcodeStr, 10686 "${src1}"##_.BroadcastStr, 10687 "${src1}"##_.BroadcastStr, 10688 (_.VT (OpNode (X86VBroadcast 10689 (_.ScalarLdFrag addr:$src1))))>, 10690 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 10691 Sched<[sched.Folded]>; 10692} 10693 10694multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10695 X86SchedWriteWidths sched, 10696 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10697 let Predicates = [prd] in 10698 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10699 EVEX_V512; 10700 10701 let Predicates = [prd, HasVLX] in { 10702 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10703 EVEX_V256; 10704 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10705 EVEX_V128; 10706 } 10707} 10708 10709multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10710 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 10711 Predicate prd> { 10712 let Predicates = [prd] in 10713 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10714 EVEX_V512; 10715 10716 let Predicates = [prd, HasVLX] in { 10717 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10718 EVEX_V256; 10719 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10720 EVEX_V128; 10721 } 10722} 10723 10724multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 10725 SDNode OpNode, X86SchedWriteWidths sched, 10726 Predicate prd> { 10727 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 10728 avx512vl_i64_info, prd>, VEX_W; 10729 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 10730 avx512vl_i32_info, prd>; 10731} 10732 10733multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 10734 SDNode OpNode, X86SchedWriteWidths sched, 10735 Predicate prd> { 10736 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 10737 avx512vl_i16_info, prd>, VEX_WIG; 10738 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 10739 avx512vl_i8_info, prd>, VEX_WIG; 10740} 10741 10742multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 10743 bits<8> opc_d, bits<8> opc_q, 10744 string OpcodeStr, SDNode OpNode, 10745 X86SchedWriteWidths sched> { 10746 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 10747 HasAVX512>, 10748 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 10749 HasBWI>; 10750} 10751 10752defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 10753 SchedWriteVecALU>; 10754 10755// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 10756let Predicates = [HasAVX512, NoVLX] in { 10757 def : Pat<(v4i64 (abs VR256X:$src)), 10758 (EXTRACT_SUBREG 10759 (VPABSQZrr 10760 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 10761 sub_ymm)>; 10762 def : Pat<(v2i64 (abs VR128X:$src)), 10763 (EXTRACT_SUBREG 10764 (VPABSQZrr 10765 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 10766 sub_xmm)>; 10767} 10768 10769// Use 512bit version to implement 128/256 bit. 10770multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 10771 AVX512VLVectorVTInfo _, Predicate prd> { 10772 let Predicates = [prd, NoVLX] in { 10773 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), 10774 (EXTRACT_SUBREG 10775 (!cast<Instruction>(InstrStr # "Zrr") 10776 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10777 _.info256.RC:$src1, 10778 _.info256.SubRegIdx)), 10779 _.info256.SubRegIdx)>; 10780 10781 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), 10782 (EXTRACT_SUBREG 10783 (!cast<Instruction>(InstrStr # "Zrr") 10784 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10785 _.info128.RC:$src1, 10786 _.info128.SubRegIdx)), 10787 _.info128.SubRegIdx)>; 10788 } 10789} 10790 10791defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 10792 SchedWriteVecIMul, HasCDI>; 10793 10794// FIXME: Is there a better scheduler class for VPCONFLICT? 10795defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 10796 SchedWriteVecALU, HasCDI>; 10797 10798// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 10799defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 10800defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 10801 10802//===---------------------------------------------------------------------===// 10803// Counts number of ones - VPOPCNTD and VPOPCNTQ 10804//===---------------------------------------------------------------------===// 10805 10806// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 10807defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 10808 SchedWriteVecALU, HasVPOPCNTDQ>; 10809 10810defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 10811defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 10812 10813//===---------------------------------------------------------------------===// 10814// Replicate Single FP - MOVSHDUP and MOVSLDUP 10815//===---------------------------------------------------------------------===// 10816 10817multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 10818 X86SchedWriteWidths sched> { 10819 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 10820 avx512vl_f32_info, HasAVX512>, XS; 10821} 10822 10823defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 10824 SchedWriteFShuffle>; 10825defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 10826 SchedWriteFShuffle>; 10827 10828//===----------------------------------------------------------------------===// 10829// AVX-512 - MOVDDUP 10830//===----------------------------------------------------------------------===// 10831 10832multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode, 10833 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10834 let ExeDomain = _.ExeDomain in { 10835 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10836 (ins _.RC:$src), OpcodeStr, "$src", "$src", 10837 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX, 10838 Sched<[sched]>; 10839 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10840 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 10841 (_.VT (OpNode (_.VT (scalar_to_vector 10842 (_.ScalarLdFrag addr:$src)))))>, 10843 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 10844 Sched<[sched.Folded]>; 10845 } 10846} 10847 10848multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 10849 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 10850 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 10851 VTInfo.info512>, EVEX_V512; 10852 10853 let Predicates = [HasAVX512, HasVLX] in { 10854 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 10855 VTInfo.info256>, EVEX_V256; 10856 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM, 10857 VTInfo.info128>, EVEX_V128; 10858 } 10859} 10860 10861multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, 10862 X86SchedWriteWidths sched> { 10863 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, 10864 avx512vl_f64_info>, XD, VEX_W; 10865} 10866 10867defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; 10868 10869let Predicates = [HasVLX] in { 10870def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), 10871 (VMOVDDUPZ128rm addr:$src)>; 10872def : Pat<(v2f64 (X86VBroadcast f64:$src)), 10873 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10874def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), 10875 (VMOVDDUPZ128rm addr:$src)>; 10876def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), 10877 (VMOVDDUPZ128rm addr:$src)>; 10878 10879def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10880 (v2f64 VR128X:$src0)), 10881 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 10882 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10883def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10884 immAllZerosV), 10885 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10886 10887def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), 10888 (v2f64 VR128X:$src0)), 10889 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10890def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), 10891 immAllZerosV), 10892 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10893 10894def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), 10895 (v2f64 VR128X:$src0)), 10896 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10897def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), 10898 immAllZerosV), 10899 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10900} 10901 10902//===----------------------------------------------------------------------===// 10903// AVX-512 - Unpack Instructions 10904//===----------------------------------------------------------------------===// 10905 10906defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, 10907 SchedWriteFShuffleSizes, 0, 1>; 10908defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, 10909 SchedWriteFShuffleSizes>; 10910 10911defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 10912 SchedWriteShuffle, HasBWI>; 10913defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 10914 SchedWriteShuffle, HasBWI>; 10915defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 10916 SchedWriteShuffle, HasBWI>; 10917defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 10918 SchedWriteShuffle, HasBWI>; 10919 10920defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 10921 SchedWriteShuffle, HasAVX512>; 10922defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 10923 SchedWriteShuffle, HasAVX512>; 10924defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 10925 SchedWriteShuffle, HasAVX512>; 10926defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 10927 SchedWriteShuffle, HasAVX512>; 10928 10929//===----------------------------------------------------------------------===// 10930// AVX-512 - Extract & Insert Integer Instructions 10931//===----------------------------------------------------------------------===// 10932 10933multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10934 X86VectorVTInfo _> { 10935 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 10936 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10937 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10938 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), 10939 addr:$dst)]>, 10940 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 10941} 10942 10943multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 10944 let Predicates = [HasBWI] in { 10945 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 10946 (ins _.RC:$src1, u8imm:$src2), 10947 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10948 [(set GR32orGR64:$dst, 10949 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, 10950 EVEX, TAPD, Sched<[WriteVecExtract]>; 10951 10952 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 10953 } 10954} 10955 10956multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 10957 let Predicates = [HasBWI] in { 10958 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 10959 (ins _.RC:$src1, u8imm:$src2), 10960 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10961 [(set GR32orGR64:$dst, 10962 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, 10963 EVEX, PD, Sched<[WriteVecExtract]>; 10964 10965 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 10966 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 10967 (ins _.RC:$src1, u8imm:$src2), 10968 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 10969 EVEX, TAPD, FoldGenData<NAME#rr>, 10970 Sched<[WriteVecExtract]>; 10971 10972 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 10973 } 10974} 10975 10976multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 10977 RegisterClass GRC> { 10978 let Predicates = [HasDQI] in { 10979 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 10980 (ins _.RC:$src1, u8imm:$src2), 10981 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10982 [(set GRC:$dst, 10983 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 10984 EVEX, TAPD, Sched<[WriteVecExtract]>; 10985 10986 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 10987 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10988 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10989 [(store (extractelt (_.VT _.RC:$src1), 10990 imm:$src2),addr:$dst)]>, 10991 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 10992 Sched<[WriteVecExtractSt]>; 10993 } 10994} 10995 10996defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 10997defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 10998defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 10999defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 11000 11001multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11002 X86VectorVTInfo _, PatFrag LdFrag> { 11003 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11004 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11005 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11006 [(set _.RC:$dst, 11007 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, 11008 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11009} 11010 11011multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11012 X86VectorVTInfo _, PatFrag LdFrag> { 11013 let Predicates = [HasBWI] in { 11014 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11015 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11016 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11017 [(set _.RC:$dst, 11018 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V, 11019 Sched<[WriteVecInsert]>; 11020 11021 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>; 11022 } 11023} 11024 11025multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11026 X86VectorVTInfo _, RegisterClass GRC> { 11027 let Predicates = [HasDQI] in { 11028 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11029 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11030 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11031 [(set _.RC:$dst, 11032 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11033 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 11034 11035 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11036 _.ScalarLdFrag>, TAPD; 11037 } 11038} 11039 11040defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11041 extloadi8>, TAPD, VEX_WIG; 11042defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11043 extloadi16>, PD, VEX_WIG; 11044defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11045defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 11046 11047//===----------------------------------------------------------------------===// 11048// VSHUFPS - VSHUFPD Operations 11049//===----------------------------------------------------------------------===// 11050 11051multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, 11052 AVX512VLVectorVTInfo VTInfo_FP>{ 11053 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11054 SchedWriteFShuffle>, 11055 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11056 AVX512AIi8Base, EVEX_4V; 11057} 11058 11059defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; 11060defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; 11061 11062//===----------------------------------------------------------------------===// 11063// AVX-512 - Byte shift Left/Right 11064//===----------------------------------------------------------------------===// 11065 11066// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well? 11067multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11068 Format MRMm, string OpcodeStr, 11069 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11070 def rr : AVX512<opc, MRMr, 11071 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11072 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11073 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>, 11074 Sched<[sched]>; 11075 def rm : AVX512<opc, MRMm, 11076 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11077 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11078 [(set _.RC:$dst,(_.VT (OpNode 11079 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11080 (i8 imm:$src2))))]>, 11081 Sched<[sched.Folded, sched.ReadAfterFold]>; 11082} 11083 11084multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11085 Format MRMm, string OpcodeStr, 11086 X86SchedWriteWidths sched, Predicate prd>{ 11087 let Predicates = [prd] in 11088 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11089 sched.ZMM, v64i8_info>, EVEX_V512; 11090 let Predicates = [prd, HasVLX] in { 11091 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11092 sched.YMM, v32i8x_info>, EVEX_V256; 11093 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11094 sched.XMM, v16i8x_info>, EVEX_V128; 11095 } 11096} 11097defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11098 SchedWriteShuffle, HasBWI>, 11099 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11100defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11101 SchedWriteShuffle, HasBWI>, 11102 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11103 11104multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11105 string OpcodeStr, X86FoldableSchedWrite sched, 11106 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11107 def rr : AVX512BI<opc, MRMSrcReg, 11108 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11109 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11110 [(set _dst.RC:$dst,(_dst.VT 11111 (OpNode (_src.VT _src.RC:$src1), 11112 (_src.VT _src.RC:$src2))))]>, 11113 Sched<[sched]>; 11114 def rm : AVX512BI<opc, MRMSrcMem, 11115 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11116 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11117 [(set _dst.RC:$dst,(_dst.VT 11118 (OpNode (_src.VT _src.RC:$src1), 11119 (_src.VT (bitconvert 11120 (_src.LdFrag addr:$src2))))))]>, 11121 Sched<[sched.Folded, sched.ReadAfterFold]>; 11122} 11123 11124multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11125 string OpcodeStr, X86SchedWriteWidths sched, 11126 Predicate prd> { 11127 let Predicates = [prd] in 11128 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11129 v8i64_info, v64i8_info>, EVEX_V512; 11130 let Predicates = [prd, HasVLX] in { 11131 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11132 v4i64x_info, v32i8x_info>, EVEX_V256; 11133 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11134 v2i64x_info, v16i8x_info>, EVEX_V128; 11135 } 11136} 11137 11138defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11139 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11140 11141// Transforms to swizzle an immediate to enable better matching when 11142// memory operand isn't in the right place. 11143def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{ 11144 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11145 uint8_t Imm = N->getZExtValue(); 11146 // Swap bits 1/4 and 3/6. 11147 uint8_t NewImm = Imm & 0xa5; 11148 if (Imm & 0x02) NewImm |= 0x10; 11149 if (Imm & 0x10) NewImm |= 0x02; 11150 if (Imm & 0x08) NewImm |= 0x40; 11151 if (Imm & 0x40) NewImm |= 0x08; 11152 return getI8Imm(NewImm, SDLoc(N)); 11153}]>; 11154def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{ 11155 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11156 uint8_t Imm = N->getZExtValue(); 11157 // Swap bits 2/4 and 3/5. 11158 uint8_t NewImm = Imm & 0xc3; 11159 if (Imm & 0x04) NewImm |= 0x10; 11160 if (Imm & 0x10) NewImm |= 0x04; 11161 if (Imm & 0x08) NewImm |= 0x20; 11162 if (Imm & 0x20) NewImm |= 0x08; 11163 return getI8Imm(NewImm, SDLoc(N)); 11164}]>; 11165def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{ 11166 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11167 uint8_t Imm = N->getZExtValue(); 11168 // Swap bits 1/2 and 5/6. 11169 uint8_t NewImm = Imm & 0x99; 11170 if (Imm & 0x02) NewImm |= 0x04; 11171 if (Imm & 0x04) NewImm |= 0x02; 11172 if (Imm & 0x20) NewImm |= 0x40; 11173 if (Imm & 0x40) NewImm |= 0x20; 11174 return getI8Imm(NewImm, SDLoc(N)); 11175}]>; 11176def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{ 11177 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11178 uint8_t Imm = N->getZExtValue(); 11179 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11180 uint8_t NewImm = Imm & 0x81; 11181 if (Imm & 0x02) NewImm |= 0x04; 11182 if (Imm & 0x04) NewImm |= 0x10; 11183 if (Imm & 0x08) NewImm |= 0x40; 11184 if (Imm & 0x10) NewImm |= 0x02; 11185 if (Imm & 0x20) NewImm |= 0x08; 11186 if (Imm & 0x40) NewImm |= 0x20; 11187 return getI8Imm(NewImm, SDLoc(N)); 11188}]>; 11189def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{ 11190 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11191 uint8_t Imm = N->getZExtValue(); 11192 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11193 uint8_t NewImm = Imm & 0x81; 11194 if (Imm & 0x02) NewImm |= 0x10; 11195 if (Imm & 0x04) NewImm |= 0x02; 11196 if (Imm & 0x08) NewImm |= 0x20; 11197 if (Imm & 0x10) NewImm |= 0x04; 11198 if (Imm & 0x20) NewImm |= 0x40; 11199 if (Imm & 0x40) NewImm |= 0x08; 11200 return getI8Imm(NewImm, SDLoc(N)); 11201}]>; 11202 11203multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11204 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11205 string Name>{ 11206 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11207 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11208 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11209 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11210 (OpNode (_.VT _.RC:$src1), 11211 (_.VT _.RC:$src2), 11212 (_.VT _.RC:$src3), 11213 (i8 imm:$src4)), 1, 1>, 11214 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11215 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11216 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11217 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11218 (OpNode (_.VT _.RC:$src1), 11219 (_.VT _.RC:$src2), 11220 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11221 (i8 imm:$src4)), 1, 0>, 11222 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11223 Sched<[sched.Folded, sched.ReadAfterFold]>; 11224 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11225 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11226 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11227 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11228 (OpNode (_.VT _.RC:$src1), 11229 (_.VT _.RC:$src2), 11230 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 11231 (i8 imm:$src4)), 1, 0>, EVEX_B, 11232 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11233 Sched<[sched.Folded, sched.ReadAfterFold]>; 11234 }// Constraints = "$src1 = $dst" 11235 11236 // Additional patterns for matching passthru operand in other positions. 11237 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11238 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11239 _.RC:$src1)), 11240 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11241 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11242 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11243 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)), 11244 _.RC:$src1)), 11245 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11246 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11247 11248 // Additional patterns for matching loads in other positions. 11249 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), 11250 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), 11251 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11252 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11253 def : Pat<(_.VT (OpNode _.RC:$src1, 11254 (bitconvert (_.LdFrag addr:$src3)), 11255 _.RC:$src2, (i8 imm:$src4))), 11256 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11257 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11258 11259 // Additional patterns for matching zero masking with loads in other 11260 // positions. 11261 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11262 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11263 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11264 _.ImmAllZerosV)), 11265 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11266 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11267 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11268 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11269 _.RC:$src2, (i8 imm:$src4)), 11270 _.ImmAllZerosV)), 11271 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11272 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11273 11274 // Additional patterns for matching masked loads with different 11275 // operand orders. 11276 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11277 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11278 _.RC:$src2, (i8 imm:$src4)), 11279 _.RC:$src1)), 11280 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11281 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11282 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11283 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11284 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11285 _.RC:$src1)), 11286 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11287 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11288 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11289 (OpNode _.RC:$src2, _.RC:$src1, 11290 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), 11291 _.RC:$src1)), 11292 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11293 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11294 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11295 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11296 _.RC:$src1, (i8 imm:$src4)), 11297 _.RC:$src1)), 11298 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11299 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; 11300 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11301 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11302 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), 11303 _.RC:$src1)), 11304 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11305 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; 11306 11307 // Additional patterns for matching broadcasts in other positions. 11308 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11309 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), 11310 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11311 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11312 def : Pat<(_.VT (OpNode _.RC:$src1, 11313 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11314 _.RC:$src2, (i8 imm:$src4))), 11315 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11316 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11317 11318 // Additional patterns for matching zero masking with broadcasts in other 11319 // positions. 11320 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11321 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11322 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11323 _.ImmAllZerosV)), 11324 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11325 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11326 (VPTERNLOG321_imm8 imm:$src4))>; 11327 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11328 (OpNode _.RC:$src1, 11329 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11330 _.RC:$src2, (i8 imm:$src4)), 11331 _.ImmAllZerosV)), 11332 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11333 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11334 (VPTERNLOG132_imm8 imm:$src4))>; 11335 11336 // Additional patterns for matching masked broadcasts with different 11337 // operand orders. 11338 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11339 (OpNode _.RC:$src1, 11340 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11341 _.RC:$src2, (i8 imm:$src4)), 11342 _.RC:$src1)), 11343 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11344 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11345 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11346 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11347 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11348 _.RC:$src1)), 11349 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11350 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11351 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11352 (OpNode _.RC:$src2, _.RC:$src1, 11353 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11354 (i8 imm:$src4)), _.RC:$src1)), 11355 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11356 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11357 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11358 (OpNode _.RC:$src2, 11359 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11360 _.RC:$src1, (i8 imm:$src4)), 11361 _.RC:$src1)), 11362 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11363 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; 11364 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11365 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11366 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), 11367 _.RC:$src1)), 11368 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11369 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; 11370} 11371 11372multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11373 AVX512VLVectorVTInfo _> { 11374 let Predicates = [HasAVX512] in 11375 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11376 _.info512, NAME>, EVEX_V512; 11377 let Predicates = [HasAVX512, HasVLX] in { 11378 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11379 _.info128, NAME>, EVEX_V128; 11380 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11381 _.info256, NAME>, EVEX_V256; 11382 } 11383} 11384 11385defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11386 avx512vl_i32_info>; 11387defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11388 avx512vl_i64_info>, VEX_W; 11389 11390// Patterns to implement vnot using vpternlog instead of creating all ones 11391// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11392// so that the result is only dependent on src0. But we use the same source 11393// for all operands to prevent a false dependency. 11394// TODO: We should maybe have a more generalized algorithm for folding to 11395// vpternlog. 11396let Predicates = [HasAVX512] in { 11397 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)), 11398 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11399 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)), 11400 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11401 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)), 11402 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11403 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)), 11404 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11405} 11406 11407let Predicates = [HasAVX512, NoVLX] in { 11408 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11409 (EXTRACT_SUBREG 11410 (VPTERNLOGQZrri 11411 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11412 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11413 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11414 (i8 15)), sub_xmm)>; 11415 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11416 (EXTRACT_SUBREG 11417 (VPTERNLOGQZrri 11418 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11419 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11420 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11421 (i8 15)), sub_xmm)>; 11422 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11423 (EXTRACT_SUBREG 11424 (VPTERNLOGQZrri 11425 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11426 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11427 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11428 (i8 15)), sub_xmm)>; 11429 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11430 (EXTRACT_SUBREG 11431 (VPTERNLOGQZrri 11432 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11433 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11434 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11435 (i8 15)), sub_xmm)>; 11436 11437 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11438 (EXTRACT_SUBREG 11439 (VPTERNLOGQZrri 11440 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11441 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11442 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11443 (i8 15)), sub_ymm)>; 11444 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11445 (EXTRACT_SUBREG 11446 (VPTERNLOGQZrri 11447 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11448 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11449 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11450 (i8 15)), sub_ymm)>; 11451 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11452 (EXTRACT_SUBREG 11453 (VPTERNLOGQZrri 11454 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11455 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11456 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11457 (i8 15)), sub_ymm)>; 11458 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11459 (EXTRACT_SUBREG 11460 (VPTERNLOGQZrri 11461 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11462 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11463 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11464 (i8 15)), sub_ymm)>; 11465} 11466 11467let Predicates = [HasVLX] in { 11468 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11469 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11470 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11471 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11472 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11473 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11474 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11475 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11476 11477 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11478 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11479 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11480 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11481 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11482 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11483 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11484 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11485} 11486 11487//===----------------------------------------------------------------------===// 11488// AVX-512 - FixupImm 11489//===----------------------------------------------------------------------===// 11490 11491multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11492 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11493 X86VectorVTInfo TblVT>{ 11494 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11495 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11496 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11497 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11498 (X86VFixupimm (_.VT _.RC:$src1), 11499 (_.VT _.RC:$src2), 11500 (TblVT.VT _.RC:$src3), 11501 (i32 imm:$src4))>, Sched<[sched]>; 11502 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11503 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11504 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11505 (X86VFixupimm (_.VT _.RC:$src1), 11506 (_.VT _.RC:$src2), 11507 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11508 (i32 imm:$src4))>, 11509 Sched<[sched.Folded, sched.ReadAfterFold]>; 11510 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11511 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11512 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11513 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11514 (X86VFixupimm (_.VT _.RC:$src1), 11515 (_.VT _.RC:$src2), 11516 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), 11517 (i32 imm:$src4))>, 11518 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11519 } // Constraints = "$src1 = $dst" 11520} 11521 11522multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11523 X86FoldableSchedWrite sched, 11524 X86VectorVTInfo _, X86VectorVTInfo TblVT> 11525 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 11526let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11527 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11528 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11529 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11530 "$src2, $src3, {sae}, $src4", 11531 (X86VFixupimmSAE (_.VT _.RC:$src1), 11532 (_.VT _.RC:$src2), 11533 (TblVT.VT _.RC:$src3), 11534 (i32 imm:$src4))>, 11535 EVEX_B, Sched<[sched]>; 11536 } 11537} 11538 11539multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 11540 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11541 X86VectorVTInfo _src3VT> { 11542 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11543 ExeDomain = _.ExeDomain in { 11544 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11545 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11546 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11547 (X86VFixupimms (_.VT _.RC:$src1), 11548 (_.VT _.RC:$src2), 11549 (_src3VT.VT _src3VT.RC:$src3), 11550 (i32 imm:$src4))>, Sched<[sched]>; 11551 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11552 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11553 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11554 "$src2, $src3, {sae}, $src4", 11555 (X86VFixupimmSAEs (_.VT _.RC:$src1), 11556 (_.VT _.RC:$src2), 11557 (_src3VT.VT _src3VT.RC:$src3), 11558 (i32 imm:$src4))>, 11559 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11560 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11561 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11562 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11563 (X86VFixupimms (_.VT _.RC:$src1), 11564 (_.VT _.RC:$src2), 11565 (_src3VT.VT (scalar_to_vector 11566 (_src3VT.ScalarLdFrag addr:$src3))), 11567 (i32 imm:$src4))>, 11568 Sched<[sched.Folded, sched.ReadAfterFold]>; 11569 } 11570} 11571 11572multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11573 AVX512VLVectorVTInfo _Vec, 11574 AVX512VLVectorVTInfo _Tbl> { 11575 let Predicates = [HasAVX512] in 11576 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 11577 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11578 EVEX_4V, EVEX_V512; 11579 let Predicates = [HasAVX512, HasVLX] in { 11580 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 11581 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11582 EVEX_4V, EVEX_V128; 11583 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 11584 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11585 EVEX_4V, EVEX_V256; 11586 } 11587} 11588 11589defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11590 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11591 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11592defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11593 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11594 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11595defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11596 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11597defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11598 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 11599 11600// Patterns used to select SSE scalar fp arithmetic instructions from 11601// either: 11602// 11603// (1) a scalar fp operation followed by a blend 11604// 11605// The effect is that the backend no longer emits unnecessary vector 11606// insert instructions immediately after SSE scalar fp instructions 11607// like addss or mulss. 11608// 11609// For example, given the following code: 11610// __m128 foo(__m128 A, __m128 B) { 11611// A[0] += B[0]; 11612// return A; 11613// } 11614// 11615// Previously we generated: 11616// addss %xmm0, %xmm1 11617// movss %xmm1, %xmm0 11618// 11619// We now generate: 11620// addss %xmm1, %xmm0 11621// 11622// (2) a vector packed single/double fp operation followed by a vector insert 11623// 11624// The effect is that the backend converts the packed fp instruction 11625// followed by a vector insert into a single SSE scalar fp instruction. 11626// 11627// For example, given the following code: 11628// __m128 foo(__m128 A, __m128 B) { 11629// __m128 C = A + B; 11630// return (__m128) {c[0], a[1], a[2], a[3]}; 11631// } 11632// 11633// Previously we generated: 11634// addps %xmm0, %xmm1 11635// movss %xmm1, %xmm0 11636// 11637// We now generate: 11638// addss %xmm1, %xmm0 11639 11640// TODO: Some canonicalization in lowering would simplify the number of 11641// patterns we have to try to match. 11642multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode, 11643 X86VectorVTInfo _, PatLeaf ZeroFP> { 11644 let Predicates = [HasAVX512] in { 11645 // extracted scalar math op with insert via movss 11646 def : Pat<(MoveNode 11647 (_.VT VR128X:$dst), 11648 (_.VT (scalar_to_vector 11649 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11650 _.FRC:$src)))), 11651 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, 11652 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 11653 def : Pat<(MoveNode 11654 (_.VT VR128X:$dst), 11655 (_.VT (scalar_to_vector 11656 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11657 (_.ScalarLdFrag addr:$src))))), 11658 (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>; 11659 11660 // extracted masked scalar math op with insert via movss 11661 def : Pat<(MoveNode (_.VT VR128X:$src1), 11662 (scalar_to_vector 11663 (X86selects VK1WM:$mask, 11664 (Op (_.EltVT 11665 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11666 _.FRC:$src2), 11667 _.FRC:$src0))), 11668 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk) 11669 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11670 VK1WM:$mask, _.VT:$src1, 11671 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11672 def : Pat<(MoveNode (_.VT VR128X:$src1), 11673 (scalar_to_vector 11674 (X86selects VK1WM:$mask, 11675 (Op (_.EltVT 11676 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11677 (_.ScalarLdFrag addr:$src2)), 11678 _.FRC:$src0))), 11679 (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk) 11680 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11681 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11682 11683 // extracted masked scalar math op with insert via movss 11684 def : Pat<(MoveNode (_.VT VR128X:$src1), 11685 (scalar_to_vector 11686 (X86selects VK1WM:$mask, 11687 (Op (_.EltVT 11688 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11689 _.FRC:$src2), (_.EltVT ZeroFP)))), 11690 (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 11691 VK1WM:$mask, _.VT:$src1, 11692 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11693 def : Pat<(MoveNode (_.VT VR128X:$src1), 11694 (scalar_to_vector 11695 (X86selects VK1WM:$mask, 11696 (Op (_.EltVT 11697 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11698 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 11699 (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11700 } 11701} 11702 11703defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 11704defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 11705defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 11706defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 11707 11708defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 11709defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 11710defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 11711defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 11712 11713multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, 11714 SDNode Move, X86VectorVTInfo _> { 11715 let Predicates = [HasAVX512] in { 11716 def : Pat<(_.VT (Move _.VT:$dst, 11717 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11718 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>; 11719 } 11720} 11721 11722defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 11723defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 11724 11725//===----------------------------------------------------------------------===// 11726// AES instructions 11727//===----------------------------------------------------------------------===// 11728 11729multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 11730 let Predicates = [HasVLX, HasVAES] in { 11731 defm Z128 : AESI_binop_rm_int<Op, OpStr, 11732 !cast<Intrinsic>(IntPrefix), 11733 loadv2i64, 0, VR128X, i128mem>, 11734 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 11735 defm Z256 : AESI_binop_rm_int<Op, OpStr, 11736 !cast<Intrinsic>(IntPrefix##"_256"), 11737 loadv4i64, 0, VR256X, i256mem>, 11738 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 11739 } 11740 let Predicates = [HasAVX512, HasVAES] in 11741 defm Z : AESI_binop_rm_int<Op, OpStr, 11742 !cast<Intrinsic>(IntPrefix##"_512"), 11743 loadv8i64, 0, VR512, i512mem>, 11744 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 11745} 11746 11747defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 11748defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 11749defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 11750defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 11751 11752//===----------------------------------------------------------------------===// 11753// PCLMUL instructions - Carry less multiplication 11754//===----------------------------------------------------------------------===// 11755 11756let Predicates = [HasAVX512, HasVPCLMULQDQ] in 11757defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 11758 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 11759 11760let Predicates = [HasVLX, HasVPCLMULQDQ] in { 11761defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 11762 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 11763 11764defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 11765 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 11766 EVEX_CD8<64, CD8VF>, VEX_WIG; 11767} 11768 11769// Aliases 11770defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 11771defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 11772defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 11773 11774//===----------------------------------------------------------------------===// 11775// VBMI2 11776//===----------------------------------------------------------------------===// 11777 11778multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 11779 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11780 let Constraints = "$src1 = $dst", 11781 ExeDomain = VTI.ExeDomain in { 11782 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11783 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11784 "$src3, $src2", "$src2, $src3", 11785 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 11786 AVX512FMA3Base, Sched<[sched]>; 11787 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11788 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11789 "$src3, $src2", "$src2, $src3", 11790 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11791 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11792 AVX512FMA3Base, 11793 Sched<[sched.Folded, sched.ReadAfterFold]>; 11794 } 11795} 11796 11797multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11798 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 11799 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 11800 let Constraints = "$src1 = $dst", 11801 ExeDomain = VTI.ExeDomain in 11802 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11803 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 11804 "${src3}"##VTI.BroadcastStr##", $src2", 11805 "$src2, ${src3}"##VTI.BroadcastStr, 11806 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11807 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, 11808 AVX512FMA3Base, EVEX_B, 11809 Sched<[sched.Folded, sched.ReadAfterFold]>; 11810} 11811 11812multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 11813 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11814 let Predicates = [HasVBMI2] in 11815 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11816 EVEX_V512; 11817 let Predicates = [HasVBMI2, HasVLX] in { 11818 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11819 EVEX_V256; 11820 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11821 EVEX_V128; 11822 } 11823} 11824 11825multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 11826 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11827 let Predicates = [HasVBMI2] in 11828 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11829 EVEX_V512; 11830 let Predicates = [HasVBMI2, HasVLX] in { 11831 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11832 EVEX_V256; 11833 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11834 EVEX_V128; 11835 } 11836} 11837multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 11838 SDNode OpNode, X86SchedWriteWidths sched> { 11839 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched, 11840 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 11841 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched, 11842 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11843 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched, 11844 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 11845} 11846 11847multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 11848 SDNode OpNode, X86SchedWriteWidths sched> { 11849 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched, 11850 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 11851 VEX_W, EVEX_CD8<16, CD8VF>; 11852 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp, 11853 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11854 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode, 11855 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11856} 11857 11858// Concat & Shift 11859defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 11860defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 11861defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 11862defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 11863 11864// Compress 11865defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 11866 avx512vl_i8_info, HasVBMI2>, EVEX, 11867 NotMemoryFoldable; 11868defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 11869 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 11870 NotMemoryFoldable; 11871// Expand 11872defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 11873 avx512vl_i8_info, HasVBMI2>, EVEX; 11874defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 11875 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 11876 11877//===----------------------------------------------------------------------===// 11878// VNNI 11879//===----------------------------------------------------------------------===// 11880 11881let Constraints = "$src1 = $dst" in 11882multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11883 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11884 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11885 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11886 "$src3, $src2", "$src2, $src3", 11887 (VTI.VT (OpNode VTI.RC:$src1, 11888 VTI.RC:$src2, VTI.RC:$src3))>, 11889 EVEX_4V, T8PD, Sched<[sched]>; 11890 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11891 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11892 "$src3, $src2", "$src2, $src3", 11893 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11894 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11895 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 11896 Sched<[sched.Folded, sched.ReadAfterFold]>; 11897 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11898 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 11899 OpStr, "${src3}"##VTI.BroadcastStr##", $src2", 11900 "$src2, ${src3}"##VTI.BroadcastStr, 11901 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11902 (VTI.VT (X86VBroadcast 11903 (VTI.ScalarLdFrag addr:$src3))))>, 11904 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 11905 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 11906} 11907 11908multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 11909 X86SchedWriteWidths sched> { 11910 let Predicates = [HasVNNI] in 11911 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512; 11912 let Predicates = [HasVNNI, HasVLX] in { 11913 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256; 11914 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128; 11915 } 11916} 11917 11918// FIXME: Is there a better scheduler class for VPDP? 11919defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>; 11920defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>; 11921defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>; 11922defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>; 11923 11924//===----------------------------------------------------------------------===// 11925// Bit Algorithms 11926//===----------------------------------------------------------------------===// 11927 11928// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 11929defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 11930 avx512vl_i8_info, HasBITALG>; 11931defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 11932 avx512vl_i16_info, HasBITALG>, VEX_W; 11933 11934defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 11935defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 11936 11937def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 11938 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 11939 return N->hasOneUse(); 11940}]>; 11941 11942multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11943 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 11944 (ins VTI.RC:$src1, VTI.RC:$src2), 11945 "vpshufbitqmb", 11946 "$src2, $src1", "$src1, $src2", 11947 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11948 (VTI.VT VTI.RC:$src2)), 11949 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 11950 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 11951 Sched<[sched]>; 11952 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 11953 (ins VTI.RC:$src1, VTI.MemOp:$src2), 11954 "vpshufbitqmb", 11955 "$src2, $src1", "$src1, $src2", 11956 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11957 (VTI.VT (VTI.LdFrag addr:$src2))), 11958 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 11959 (VTI.VT (VTI.LdFrag addr:$src2)))>, 11960 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 11961 Sched<[sched.Folded, sched.ReadAfterFold]>; 11962} 11963 11964multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11965 let Predicates = [HasBITALG] in 11966 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 11967 let Predicates = [HasBITALG, HasVLX] in { 11968 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 11969 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 11970 } 11971} 11972 11973// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 11974defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 11975 11976//===----------------------------------------------------------------------===// 11977// GFNI 11978//===----------------------------------------------------------------------===// 11979 11980multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11981 X86SchedWriteWidths sched> { 11982 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11983 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 11984 EVEX_V512; 11985 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11986 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 11987 EVEX_V256; 11988 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 11989 EVEX_V128; 11990 } 11991} 11992 11993defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 11994 SchedWriteVecALU>, 11995 EVEX_CD8<8, CD8VF>, T8PD; 11996 11997multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 11998 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11999 X86VectorVTInfo BcstVTI> 12000 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12001 let ExeDomain = VTI.ExeDomain in 12002 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12003 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12004 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", 12005 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", 12006 (OpNode (VTI.VT VTI.RC:$src1), 12007 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), 12008 (i8 imm:$src3))>, EVEX_B, 12009 Sched<[sched.Folded, sched.ReadAfterFold]>; 12010} 12011 12012multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12013 X86SchedWriteWidths sched> { 12014 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12015 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12016 v64i8_info, v8i64_info>, EVEX_V512; 12017 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12018 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12019 v32i8x_info, v4i64x_info>, EVEX_V256; 12020 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12021 v16i8x_info, v2i64x_info>, EVEX_V128; 12022 } 12023} 12024 12025defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12026 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12027 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12028defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12029 X86GF2P8affineqb, SchedWriteVecIMul>, 12030 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12031 12032 12033//===----------------------------------------------------------------------===// 12034// AVX5124FMAPS 12035//===----------------------------------------------------------------------===// 12036 12037let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12038 Constraints = "$src1 = $dst" in { 12039defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12040 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12041 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12042 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12043 Sched<[SchedWriteFMA.ZMM.Folded]>; 12044 12045defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12046 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12047 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12048 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12049 Sched<[SchedWriteFMA.ZMM.Folded]>; 12050 12051defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12052 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12053 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12054 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12055 Sched<[SchedWriteFMA.Scl.Folded]>; 12056 12057defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12058 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12059 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12060 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12061 Sched<[SchedWriteFMA.Scl.Folded]>; 12062} 12063 12064//===----------------------------------------------------------------------===// 12065// AVX5124VNNIW 12066//===----------------------------------------------------------------------===// 12067 12068let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12069 Constraints = "$src1 = $dst" in { 12070defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12071 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12072 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12073 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12074 Sched<[SchedWriteFMA.ZMM.Folded]>; 12075 12076defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12077 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12078 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12079 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12080 Sched<[SchedWriteFMA.ZMM.Folded]>; 12081} 12082 12083let hasSideEffects = 0 in { 12084 let mayStore = 1 in 12085 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12086 let mayLoad = 1 in 12087 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12088} 12089 12090//===----------------------------------------------------------------------===// 12091// VP2INTERSECT 12092//===----------------------------------------------------------------------===// 12093 12094multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> { 12095 def rr : I<0x68, MRMSrcReg, 12096 (outs _.KRPC:$dst), 12097 (ins _.RC:$src1, _.RC:$src2), 12098 !strconcat("vp2intersect", _.Suffix, 12099 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12100 [(set _.KRPC:$dst, (X86vp2intersect 12101 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12102 EVEX_4V, T8XD; 12103 12104 def rm : I<0x68, MRMSrcMem, 12105 (outs _.KRPC:$dst), 12106 (ins _.RC:$src1, _.MemOp:$src2), 12107 !strconcat("vp2intersect", _.Suffix, 12108 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12109 [(set _.KRPC:$dst, (X86vp2intersect 12110 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12111 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>; 12112 12113 def rmb : I<0x68, MRMSrcMem, 12114 (outs _.KRPC:$dst), 12115 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12116 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12117 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12118 [(set _.KRPC:$dst, (X86vp2intersect 12119 _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>, 12120 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; 12121} 12122 12123multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> { 12124 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12125 defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512; 12126 12127 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12128 defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256; 12129 defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128; 12130 } 12131} 12132 12133defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>; 12134defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W; 12135 12136multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12137 X86SchedWriteWidths sched, 12138 AVX512VLVectorVTInfo _SrcVTInfo, 12139 AVX512VLVectorVTInfo _DstVTInfo, 12140 SDNode OpNode, Predicate prd, 12141 bit IsCommutable = 0> { 12142 let Predicates = [prd] in 12143 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12144 _SrcVTInfo.info512, _DstVTInfo.info512, 12145 _SrcVTInfo.info512, IsCommutable>, 12146 EVEX_V512, EVEX_CD8<32, CD8VF>; 12147 let Predicates = [HasVLX, prd] in { 12148 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12149 _SrcVTInfo.info256, _DstVTInfo.info256, 12150 _SrcVTInfo.info256, IsCommutable>, 12151 EVEX_V256, EVEX_CD8<32, CD8VF>; 12152 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12153 _SrcVTInfo.info128, _DstVTInfo.info128, 12154 _SrcVTInfo.info128, IsCommutable>, 12155 EVEX_V128, EVEX_CD8<32, CD8VF>; 12156 } 12157} 12158 12159defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12160 SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF 12161 avx512vl_f32_info, avx512vl_i16_info, 12162 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12163 12164// Truncate Float to BFloat16 12165multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12166 X86SchedWriteWidths sched> { 12167 let Predicates = [HasBF16] in { 12168 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12169 X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12170 } 12171 let Predicates = [HasBF16, HasVLX] in { 12172 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12173 null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12174 VK4WM>, EVEX_V128; 12175 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12176 X86cvtneps2bf16, 12177 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12178 12179 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 12180 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12181 VR128X:$src), 0>; 12182 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 12183 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12184 f128mem:$src), 0, "intel">; 12185 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 12186 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12187 VR256X:$src), 0>; 12188 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 12189 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12190 f256mem:$src), 0, "intel">; 12191 } 12192} 12193 12194defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12195 SchedWriteCvtPD2PS>, T8XS, 12196 EVEX_CD8<32, CD8VF>; 12197 12198let Predicates = [HasBF16, HasVLX] in { 12199 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12200 // patterns have been disabled with null_frag. 12201 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12202 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12203 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12204 VK4WM:$mask), 12205 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12206 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12207 VK4WM:$mask), 12208 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12209 12210 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12211 (VCVTNEPS2BF16Z128rm addr:$src)>; 12212 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12213 VK4WM:$mask), 12214 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12215 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12216 VK4WM:$mask), 12217 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12218 12219 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12220 (X86VBroadcast (loadf32 addr:$src))))), 12221 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12222 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), 12223 (v8i16 VR128X:$src0), VK4WM:$mask), 12224 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12225 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), 12226 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12227 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12228} 12229 12230let Constraints = "$src1 = $dst" in { 12231multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12232 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12233 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12234 (ins _.RC:$src2, _.RC:$src3), 12235 OpcodeStr, "$src3, $src2", "$src2, $src3", 12236 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, 12237 EVEX_4V; 12238 12239 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12240 (ins _.RC:$src2, _.MemOp:$src3), 12241 OpcodeStr, "$src3, $src2", "$src2, $src3", 12242 (_.VT (OpNode _.RC:$src1, _.RC:$src2, 12243 (src_v.VT (bitconvert 12244 (src_v.LdFrag addr:$src3)))))>, EVEX_4V; 12245 12246 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12247 (ins _.RC:$src2, _.ScalarMemOp:$src3), 12248 OpcodeStr, 12249 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12250 !strconcat("$src2, ${src3}", _.BroadcastStr), 12251 (_.VT (OpNode _.RC:$src1, _.RC:$src2, 12252 (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>, 12253 EVEX_B, EVEX_4V; 12254 12255} 12256} // Constraints = "$src1 = $dst" 12257 12258multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12259 AVX512VLVectorVTInfo _, 12260 AVX512VLVectorVTInfo src_v, Predicate prd> { 12261 let Predicates = [prd] in { 12262 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512, 12263 src_v.info512>, EVEX_V512; 12264 } 12265 let Predicates = [HasVLX, prd] in { 12266 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256, 12267 src_v.info256>, EVEX_V256; 12268 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128, 12269 src_v.info128>, EVEX_V128; 12270 } 12271} 12272 12273defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, 12274 avx512vl_f32_info, avx512vl_i32_info, 12275 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12276