1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 32), 4, 48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; 49 50 // The vector VT. 51 ValueType VT = !cast<ValueType>(VTName); 52 53 string EltTypeName = !cast<string>(EltVT); 54 // Size of the element type in bits, e.g. 32 for v16i32. 55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 56 int EltSize = EltVT.Size; 57 58 // "i" for integer types and "f" for floating-point types 59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 60 61 // Size of RC in bits, e.g. 512 for VR512. 62 int Size = VT.Size; 63 64 // The corresponding memory operand, e.g. i512mem for VR512. 65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 67 // FP scalar memory operand for intrinsics - ssmem/sdmem. 68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)); 70 71 // Load patterns 72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 73 74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 75 76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 77 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 78 79 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"), 80 !cast<PatFrags>("sse_load_f32"), 81 !if (!eq (EltTypeName, "f64"), 82 !cast<PatFrags>("sse_load_f64"), 83 ?)); 84 85 // The string to specify embedded broadcast in assembly. 86 string BroadcastStr = "{1to" # NumElts # "}"; 87 88 // 8-bit compressed displacement tuple/subvector format. This is only 89 // defined for NumElts <= 8. 90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 91 !cast<CD8VForm>("CD8VT" # NumElts), ?); 92 93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 94 !if (!eq (Size, 256), sub_ymm, ?)); 95 96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 98 SSEPackedInt)); 99 100 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); 101 102 dag ImmAllZerosV = (VT immAllZerosV); 103 104 string ZSuffix = !if (!eq (Size, 128), "Z128", 105 !if (!eq (Size, 256), "Z256", "Z")); 106} 107 108def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 111def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 113def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 114 115// "x" in v32i8x_info means RC = VR256X 116def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 118def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 119def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 120def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 121def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 122 123def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 124def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 125def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 126def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 127def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 128def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 129 130// We map scalar types to the smallest (128-bit) vector type 131// with the appropriate element type. This allows to use the same masking logic. 132def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 133def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 134def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 135def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 136 137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 138 X86VectorVTInfo i128> { 139 X86VectorVTInfo info512 = i512; 140 X86VectorVTInfo info256 = i256; 141 X86VectorVTInfo info128 = i128; 142} 143 144def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 145 v16i8x_info>; 146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 147 v8i16x_info>; 148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 149 v4i32x_info>; 150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 151 v2i64x_info>; 152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 153 v4f32x_info>; 154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 155 v2f64x_info>; 156 157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 158 ValueType _vt> { 159 RegisterClass KRC = _krc; 160 RegisterClass KRCWM = _krcwm; 161 ValueType KVT = _vt; 162} 163 164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 171 172// Used for matching masked operations. Ensures the operation part only has a 173// single use. 174def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 175 (vselect node:$mask, node:$src1, node:$src2), [{ 176 return isProfitableToFormMaskedOp(N); 177}]>; 178 179def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 180 (X86selects node:$mask, node:$src1, node:$src2), [{ 181 return isProfitableToFormMaskedOp(N); 182}]>; 183 184// This multiclass generates the masking variants from the non-masking 185// variant. It only provides the assembly pieces for the masking variants. 186// It assumes custom ISel patterns for masking which can be provided as 187// template arguments. 188multiclass AVX512_maskable_custom<bits<8> O, Format F, 189 dag Outs, 190 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 191 string OpcodeStr, 192 string AttSrcAsm, string IntelSrcAsm, 193 list<dag> Pattern, 194 list<dag> MaskingPattern, 195 list<dag> ZeroMaskingPattern, 196 string MaskingConstraint = "", 197 bit IsCommutable = 0, 198 bit IsKCommutable = 0, 199 bit IsKZCommutable = IsCommutable> { 200 let isCommutable = IsCommutable in 201 def NAME: AVX512<O, F, Outs, Ins, 202 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 203 "$dst, "#IntelSrcAsm#"}", 204 Pattern>; 205 206 // Prefer over VMOV*rrk Pat<> 207 let isCommutable = IsKCommutable in 208 def NAME#k: AVX512<O, F, Outs, MaskingIns, 209 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 210 "$dst {${mask}}, "#IntelSrcAsm#"}", 211 MaskingPattern>, 212 EVEX_K { 213 // In case of the 3src subclass this is overridden with a let. 214 string Constraints = MaskingConstraint; 215 } 216 217 // Zero mask does not add any restrictions to commute operands transformation. 218 // So, it is Ok to use IsCommutable instead of IsKCommutable. 219 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<> 220 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 221 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 222 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 223 ZeroMaskingPattern>, 224 EVEX_KZ; 225} 226 227 228// Common base class of AVX512_maskable and AVX512_maskable_3src. 229multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 230 dag Outs, 231 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 232 string OpcodeStr, 233 string AttSrcAsm, string IntelSrcAsm, 234 dag RHS, dag MaskingRHS, 235 SDPatternOperator Select = vselect_mask, 236 string MaskingConstraint = "", 237 bit IsCommutable = 0, 238 bit IsKCommutable = 0, 239 bit IsKZCommutable = IsCommutable> : 240 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 241 AttSrcAsm, IntelSrcAsm, 242 [(set _.RC:$dst, RHS)], 243 [(set _.RC:$dst, MaskingRHS)], 244 [(set _.RC:$dst, 245 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 246 MaskingConstraint, IsCommutable, 247 IsKCommutable, IsKZCommutable>; 248 249// This multiclass generates the unconditional/non-masking, the masking and 250// the zero-masking variant of the vector instruction. In the masking case, the 251// preserved vector elements come from a new dummy input operand tied to $dst. 252// This version uses a separate dag for non-masking and masking. 253multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 254 dag Outs, dag Ins, string OpcodeStr, 255 string AttSrcAsm, string IntelSrcAsm, 256 dag RHS, dag MaskRHS, 257 bit IsCommutable = 0, bit IsKCommutable = 0, 258 bit IsKZCommutable = IsCommutable> : 259 AVX512_maskable_custom<O, F, Outs, Ins, 260 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 261 !con((ins _.KRCWM:$mask), Ins), 262 OpcodeStr, AttSrcAsm, IntelSrcAsm, 263 [(set _.RC:$dst, RHS)], 264 [(set _.RC:$dst, 265 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 266 [(set _.RC:$dst, 267 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 268 "$src0 = $dst", IsCommutable, IsKCommutable, 269 IsKZCommutable>; 270 271// This multiclass generates the unconditional/non-masking, the masking and 272// the zero-masking variant of the vector instruction. In the masking case, the 273// preserved vector elements come from a new dummy input operand tied to $dst. 274multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 275 dag Outs, dag Ins, string OpcodeStr, 276 string AttSrcAsm, string IntelSrcAsm, 277 dag RHS, 278 bit IsCommutable = 0, bit IsKCommutable = 0, 279 bit IsKZCommutable = IsCommutable, 280 SDPatternOperator Select = vselect_mask> : 281 AVX512_maskable_common<O, F, _, Outs, Ins, 282 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 283 !con((ins _.KRCWM:$mask), Ins), 284 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 285 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 286 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 287 IsKZCommutable>; 288 289// This multiclass generates the unconditional/non-masking, the masking and 290// the zero-masking variant of the scalar instruction. 291multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 292 dag Outs, dag Ins, string OpcodeStr, 293 string AttSrcAsm, string IntelSrcAsm, 294 dag RHS> : 295 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 296 RHS, 0, 0, 0, X86selects_mask>; 297 298// Similar to AVX512_maskable but in this case one of the source operands 299// ($src1) is already tied to $dst so we just use that for the preserved 300// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 301// $src1. 302multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 303 dag Outs, dag NonTiedIns, string OpcodeStr, 304 string AttSrcAsm, string IntelSrcAsm, 305 dag RHS, 306 bit IsCommutable = 0, 307 bit IsKCommutable = 0, 308 SDPatternOperator Select = vselect_mask, 309 bit MaskOnly = 0> : 310 AVX512_maskable_common<O, F, _, Outs, 311 !con((ins _.RC:$src1), NonTiedIns), 312 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 313 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 314 OpcodeStr, AttSrcAsm, IntelSrcAsm, 315 !if(MaskOnly, (null_frag), RHS), 316 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 317 Select, "", IsCommutable, IsKCommutable>; 318 319// Similar to AVX512_maskable_3src but in this case the input VT for the tied 320// operand differs from the output VT. This requires a bitconvert on 321// the preserved vector going into the vselect. 322// NOTE: The unmasked pattern is disabled. 323multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 324 X86VectorVTInfo InVT, 325 dag Outs, dag NonTiedIns, string OpcodeStr, 326 string AttSrcAsm, string IntelSrcAsm, 327 dag RHS, bit IsCommutable = 0> : 328 AVX512_maskable_common<O, F, OutVT, Outs, 329 !con((ins InVT.RC:$src1), NonTiedIns), 330 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 331 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 332 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 333 (vselect_mask InVT.KRCWM:$mask, RHS, 334 (bitconvert InVT.RC:$src1)), 335 vselect_mask, "", IsCommutable>; 336 337multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 338 dag Outs, dag NonTiedIns, string OpcodeStr, 339 string AttSrcAsm, string IntelSrcAsm, 340 dag RHS, 341 bit IsCommutable = 0, 342 bit IsKCommutable = 0, 343 bit MaskOnly = 0> : 344 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 345 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 346 X86selects_mask, MaskOnly>; 347 348multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 349 dag Outs, dag Ins, 350 string OpcodeStr, 351 string AttSrcAsm, string IntelSrcAsm, 352 list<dag> Pattern> : 353 AVX512_maskable_custom<O, F, Outs, Ins, 354 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 355 !con((ins _.KRCWM:$mask), Ins), 356 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 357 "$src0 = $dst">; 358 359multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 360 dag Outs, dag NonTiedIns, 361 string OpcodeStr, 362 string AttSrcAsm, string IntelSrcAsm, 363 list<dag> Pattern> : 364 AVX512_maskable_custom<O, F, Outs, 365 !con((ins _.RC:$src1), NonTiedIns), 366 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 367 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 368 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 369 "">; 370 371// Instruction with mask that puts result in mask register, 372// like "compare" and "vptest" 373multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 374 dag Outs, 375 dag Ins, dag MaskingIns, 376 string OpcodeStr, 377 string AttSrcAsm, string IntelSrcAsm, 378 list<dag> Pattern, 379 list<dag> MaskingPattern, 380 bit IsCommutable = 0> { 381 let isCommutable = IsCommutable in { 382 def NAME: AVX512<O, F, Outs, Ins, 383 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 384 "$dst, "#IntelSrcAsm#"}", 385 Pattern>; 386 387 def NAME#k: AVX512<O, F, Outs, MaskingIns, 388 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 389 "$dst {${mask}}, "#IntelSrcAsm#"}", 390 MaskingPattern>, EVEX_K; 391 } 392} 393 394multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 395 dag Outs, 396 dag Ins, dag MaskingIns, 397 string OpcodeStr, 398 string AttSrcAsm, string IntelSrcAsm, 399 dag RHS, dag MaskingRHS, 400 bit IsCommutable = 0> : 401 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 402 AttSrcAsm, IntelSrcAsm, 403 [(set _.KRC:$dst, RHS)], 404 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 405 406multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 407 dag Outs, dag Ins, string OpcodeStr, 408 string AttSrcAsm, string IntelSrcAsm, 409 dag RHS, dag RHS_su, bit IsCommutable = 0> : 410 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 411 !con((ins _.KRCWM:$mask), Ins), 412 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 413 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 414 415// Used by conversion instructions. 416multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 417 dag Outs, 418 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 419 string OpcodeStr, 420 string AttSrcAsm, string IntelSrcAsm, 421 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 422 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 423 AttSrcAsm, IntelSrcAsm, 424 [(set _.RC:$dst, RHS)], 425 [(set _.RC:$dst, MaskingRHS)], 426 [(set _.RC:$dst, ZeroMaskingRHS)], 427 "$src0 = $dst">; 428 429multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 430 dag Outs, dag NonTiedIns, string OpcodeStr, 431 string AttSrcAsm, string IntelSrcAsm, 432 dag RHS, dag MaskingRHS, bit IsCommutable, 433 bit IsKCommutable> : 434 AVX512_maskable_custom<O, F, Outs, 435 !con((ins _.RC:$src1), NonTiedIns), 436 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 437 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 438 OpcodeStr, AttSrcAsm, IntelSrcAsm, 439 [(set _.RC:$dst, RHS)], 440 [(set _.RC:$dst, 441 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 442 [(set _.RC:$dst, 443 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 444 "", IsCommutable, IsKCommutable>; 445 446// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 447// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 448// swizzled by ExecutionDomainFix to pxor. 449// We set canFoldAsLoad because this can be converted to a constant-pool 450// load of an all-zeros value if folding it would be beneficial. 451let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 452 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 453def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 454 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 455def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 456 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 457} 458 459let Predicates = [HasAVX512] in { 460def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 461def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 462def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 463def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 464def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 465} 466 467// Alias instructions that allow VPTERNLOG to be used with a mask to create 468// a mix of all ones and all zeros elements. This is done this way to force 469// the same register to be used as input for all three sources. 470let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 471def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 472 (ins VK16WM:$mask), "", 473 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 474 (v16i32 immAllOnesV), 475 (v16i32 immAllZerosV)))]>; 476def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 477 (ins VK8WM:$mask), "", 478 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 479 (v8i64 immAllOnesV), 480 (v8i64 immAllZerosV)))]>; 481} 482 483let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 484 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 485def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 486 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 487def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 488 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 489} 490 491let Predicates = [HasAVX512] in { 492def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 493def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 494def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 495def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 496def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 497def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 498def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 499def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 500def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 501def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 502} 503 504// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 505// This is expanded by ExpandPostRAPseudos. 506let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 507 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 508 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 509 [(set FR32X:$dst, fp32imm0)]>; 510 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 511 [(set FR64X:$dst, fp64imm0)]>; 512 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 513 [(set VR128X:$dst, fp128imm0)]>; 514} 515 516//===----------------------------------------------------------------------===// 517// AVX-512 - VECTOR INSERT 518// 519 520// Supports two different pattern operators for mask and unmasked ops. Allows 521// null_frag to be passed for one. 522multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 523 X86VectorVTInfo To, 524 SDPatternOperator vinsert_insert, 525 SDPatternOperator vinsert_for_mask, 526 X86FoldableSchedWrite sched> { 527 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 528 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 529 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 530 "vinsert" # From.EltTypeName # "x" # From.NumElts, 531 "$src3, $src2, $src1", "$src1, $src2, $src3", 532 (vinsert_insert:$src3 (To.VT To.RC:$src1), 533 (From.VT From.RC:$src2), 534 (iPTR imm)), 535 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 536 (From.VT From.RC:$src2), 537 (iPTR imm))>, 538 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 539 let mayLoad = 1 in 540 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 541 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 542 "vinsert" # From.EltTypeName # "x" # From.NumElts, 543 "$src3, $src2, $src1", "$src1, $src2, $src3", 544 (vinsert_insert:$src3 (To.VT To.RC:$src1), 545 (From.VT (From.LdFrag addr:$src2)), 546 (iPTR imm)), 547 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 548 (From.VT (From.LdFrag addr:$src2)), 549 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 550 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 551 Sched<[sched.Folded, sched.ReadAfterFold]>; 552 } 553} 554 555// Passes the same pattern operator for masked and unmasked ops. 556multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 557 X86VectorVTInfo To, 558 SDPatternOperator vinsert_insert, 559 X86FoldableSchedWrite sched> : 560 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 561 562multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 563 X86VectorVTInfo To, PatFrag vinsert_insert, 564 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 565 let Predicates = p in { 566 def : Pat<(vinsert_insert:$ins 567 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 568 (To.VT (!cast<Instruction>(InstrStr#"rr") 569 To.RC:$src1, From.RC:$src2, 570 (INSERT_get_vinsert_imm To.RC:$ins)))>; 571 572 def : Pat<(vinsert_insert:$ins 573 (To.VT To.RC:$src1), 574 (From.VT (From.LdFrag addr:$src2)), 575 (iPTR imm)), 576 (To.VT (!cast<Instruction>(InstrStr#"rm") 577 To.RC:$src1, addr:$src2, 578 (INSERT_get_vinsert_imm To.RC:$ins)))>; 579 } 580} 581 582multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 583 ValueType EltVT64, int Opcode256, 584 X86FoldableSchedWrite sched> { 585 586 let Predicates = [HasVLX] in 587 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 588 X86VectorVTInfo< 4, EltVT32, VR128X>, 589 X86VectorVTInfo< 8, EltVT32, VR256X>, 590 vinsert128_insert, sched>, EVEX_V256; 591 592 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 593 X86VectorVTInfo< 4, EltVT32, VR128X>, 594 X86VectorVTInfo<16, EltVT32, VR512>, 595 vinsert128_insert, sched>, EVEX_V512; 596 597 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 598 X86VectorVTInfo< 4, EltVT64, VR256X>, 599 X86VectorVTInfo< 8, EltVT64, VR512>, 600 vinsert256_insert, sched>, VEX_W, EVEX_V512; 601 602 // Even with DQI we'd like to only use these instructions for masking. 603 let Predicates = [HasVLX, HasDQI] in 604 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 605 X86VectorVTInfo< 2, EltVT64, VR128X>, 606 X86VectorVTInfo< 4, EltVT64, VR256X>, 607 null_frag, vinsert128_insert, sched>, 608 VEX_W1X, EVEX_V256; 609 610 // Even with DQI we'd like to only use these instructions for masking. 611 let Predicates = [HasDQI] in { 612 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 613 X86VectorVTInfo< 2, EltVT64, VR128X>, 614 X86VectorVTInfo< 8, EltVT64, VR512>, 615 null_frag, vinsert128_insert, sched>, 616 VEX_W, EVEX_V512; 617 618 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 619 X86VectorVTInfo< 8, EltVT32, VR256X>, 620 X86VectorVTInfo<16, EltVT32, VR512>, 621 null_frag, vinsert256_insert, sched>, 622 EVEX_V512; 623 } 624} 625 626// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 627defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 628defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 629 630// Codegen pattern with the alternative types, 631// Even with AVX512DQ we'll still use these for unmasked operations. 632defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 633 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 634defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 635 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 636 637defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 638 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 639defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 640 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 641 642defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 643 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 644defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 645 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 646 647// Codegen pattern with the alternative types insert VEC128 into VEC256 648defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 649 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 650defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 651 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 652// Codegen pattern with the alternative types insert VEC128 into VEC512 653defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 654 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 655defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 656 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 657// Codegen pattern with the alternative types insert VEC256 into VEC512 658defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 659 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 660defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 661 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 662 663 664multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 665 X86VectorVTInfo To, X86VectorVTInfo Cast, 666 PatFrag vinsert_insert, 667 SDNodeXForm INSERT_get_vinsert_imm, 668 list<Predicate> p> { 669let Predicates = p in { 670 def : Pat<(Cast.VT 671 (vselect_mask Cast.KRCWM:$mask, 672 (bitconvert 673 (vinsert_insert:$ins (To.VT To.RC:$src1), 674 (From.VT From.RC:$src2), 675 (iPTR imm))), 676 Cast.RC:$src0)), 677 (!cast<Instruction>(InstrStr#"rrk") 678 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 679 (INSERT_get_vinsert_imm To.RC:$ins))>; 680 def : Pat<(Cast.VT 681 (vselect_mask Cast.KRCWM:$mask, 682 (bitconvert 683 (vinsert_insert:$ins (To.VT To.RC:$src1), 684 (From.VT 685 (bitconvert 686 (From.LdFrag addr:$src2))), 687 (iPTR imm))), 688 Cast.RC:$src0)), 689 (!cast<Instruction>(InstrStr#"rmk") 690 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 691 (INSERT_get_vinsert_imm To.RC:$ins))>; 692 693 def : Pat<(Cast.VT 694 (vselect_mask Cast.KRCWM:$mask, 695 (bitconvert 696 (vinsert_insert:$ins (To.VT To.RC:$src1), 697 (From.VT From.RC:$src2), 698 (iPTR imm))), 699 Cast.ImmAllZerosV)), 700 (!cast<Instruction>(InstrStr#"rrkz") 701 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 702 (INSERT_get_vinsert_imm To.RC:$ins))>; 703 def : Pat<(Cast.VT 704 (vselect_mask Cast.KRCWM:$mask, 705 (bitconvert 706 (vinsert_insert:$ins (To.VT To.RC:$src1), 707 (From.VT (From.LdFrag addr:$src2)), 708 (iPTR imm))), 709 Cast.ImmAllZerosV)), 710 (!cast<Instruction>(InstrStr#"rmkz") 711 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 712 (INSERT_get_vinsert_imm To.RC:$ins))>; 713} 714} 715 716defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 717 v8f32x_info, vinsert128_insert, 718 INSERT_get_vinsert128_imm, [HasVLX]>; 719defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 720 v4f64x_info, vinsert128_insert, 721 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 722 723defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 724 v8i32x_info, vinsert128_insert, 725 INSERT_get_vinsert128_imm, [HasVLX]>; 726defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 727 v8i32x_info, vinsert128_insert, 728 INSERT_get_vinsert128_imm, [HasVLX]>; 729defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 730 v8i32x_info, vinsert128_insert, 731 INSERT_get_vinsert128_imm, [HasVLX]>; 732defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 733 v4i64x_info, vinsert128_insert, 734 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 735defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 736 v4i64x_info, vinsert128_insert, 737 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 738defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 739 v4i64x_info, vinsert128_insert, 740 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 741 742defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 743 v16f32_info, vinsert128_insert, 744 INSERT_get_vinsert128_imm, [HasAVX512]>; 745defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 746 v8f64_info, vinsert128_insert, 747 INSERT_get_vinsert128_imm, [HasDQI]>; 748 749defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 750 v16i32_info, vinsert128_insert, 751 INSERT_get_vinsert128_imm, [HasAVX512]>; 752defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 753 v16i32_info, vinsert128_insert, 754 INSERT_get_vinsert128_imm, [HasAVX512]>; 755defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 756 v16i32_info, vinsert128_insert, 757 INSERT_get_vinsert128_imm, [HasAVX512]>; 758defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 759 v8i64_info, vinsert128_insert, 760 INSERT_get_vinsert128_imm, [HasDQI]>; 761defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 762 v8i64_info, vinsert128_insert, 763 INSERT_get_vinsert128_imm, [HasDQI]>; 764defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 765 v8i64_info, vinsert128_insert, 766 INSERT_get_vinsert128_imm, [HasDQI]>; 767 768defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 769 v16f32_info, vinsert256_insert, 770 INSERT_get_vinsert256_imm, [HasDQI]>; 771defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 772 v8f64_info, vinsert256_insert, 773 INSERT_get_vinsert256_imm, [HasAVX512]>; 774 775defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 776 v16i32_info, vinsert256_insert, 777 INSERT_get_vinsert256_imm, [HasDQI]>; 778defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 779 v16i32_info, vinsert256_insert, 780 INSERT_get_vinsert256_imm, [HasDQI]>; 781defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 782 v16i32_info, vinsert256_insert, 783 INSERT_get_vinsert256_imm, [HasDQI]>; 784defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 785 v8i64_info, vinsert256_insert, 786 INSERT_get_vinsert256_imm, [HasAVX512]>; 787defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 788 v8i64_info, vinsert256_insert, 789 INSERT_get_vinsert256_imm, [HasAVX512]>; 790defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 791 v8i64_info, vinsert256_insert, 792 INSERT_get_vinsert256_imm, [HasAVX512]>; 793 794// vinsertps - insert f32 to XMM 795let ExeDomain = SSEPackedSingle in { 796let isCommutable = 1 in 797def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 798 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 799 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 800 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 801 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 802def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 803 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 804 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 805 [(set VR128X:$dst, (X86insertps VR128X:$src1, 806 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 807 timm:$src3))]>, 808 EVEX_4V, EVEX_CD8<32, CD8VT1>, 809 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 810} 811 812//===----------------------------------------------------------------------===// 813// AVX-512 VECTOR EXTRACT 814//--- 815 816// Supports two different pattern operators for mask and unmasked ops. Allows 817// null_frag to be passed for one. 818multiclass vextract_for_size_split<int Opcode, 819 X86VectorVTInfo From, X86VectorVTInfo To, 820 SDPatternOperator vextract_extract, 821 SDPatternOperator vextract_for_mask, 822 SchedWrite SchedRR, SchedWrite SchedMR> { 823 824 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 825 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 826 (ins From.RC:$src1, u8imm:$idx), 827 "vextract" # To.EltTypeName # "x" # To.NumElts, 828 "$idx, $src1", "$src1, $idx", 829 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 830 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 831 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 832 833 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 834 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 835 "vextract" # To.EltTypeName # "x" # To.NumElts # 836 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 837 [(store (To.VT (vextract_extract:$idx 838 (From.VT From.RC:$src1), (iPTR imm))), 839 addr:$dst)]>, EVEX, 840 Sched<[SchedMR]>; 841 842 let mayStore = 1, hasSideEffects = 0 in 843 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 844 (ins To.MemOp:$dst, To.KRCWM:$mask, 845 From.RC:$src1, u8imm:$idx), 846 "vextract" # To.EltTypeName # "x" # To.NumElts # 847 "\t{$idx, $src1, $dst {${mask}}|" 848 "$dst {${mask}}, $src1, $idx}", []>, 849 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 850 } 851} 852 853// Passes the same pattern operator for masked and unmasked ops. 854multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 855 X86VectorVTInfo To, 856 SDPatternOperator vextract_extract, 857 SchedWrite SchedRR, SchedWrite SchedMR> : 858 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 859 860// Codegen pattern for the alternative types 861multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 862 X86VectorVTInfo To, PatFrag vextract_extract, 863 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 864 let Predicates = p in { 865 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 866 (To.VT (!cast<Instruction>(InstrStr#"rr") 867 From.RC:$src1, 868 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 869 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 870 (iPTR imm))), addr:$dst), 871 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 872 (EXTRACT_get_vextract_imm To.RC:$ext))>; 873 } 874} 875 876multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 877 ValueType EltVT64, int Opcode256, 878 SchedWrite SchedRR, SchedWrite SchedMR> { 879 let Predicates = [HasAVX512] in { 880 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 881 X86VectorVTInfo<16, EltVT32, VR512>, 882 X86VectorVTInfo< 4, EltVT32, VR128X>, 883 vextract128_extract, SchedRR, SchedMR>, 884 EVEX_V512, EVEX_CD8<32, CD8VT4>; 885 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 886 X86VectorVTInfo< 8, EltVT64, VR512>, 887 X86VectorVTInfo< 4, EltVT64, VR256X>, 888 vextract256_extract, SchedRR, SchedMR>, 889 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 890 } 891 let Predicates = [HasVLX] in 892 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 893 X86VectorVTInfo< 8, EltVT32, VR256X>, 894 X86VectorVTInfo< 4, EltVT32, VR128X>, 895 vextract128_extract, SchedRR, SchedMR>, 896 EVEX_V256, EVEX_CD8<32, CD8VT4>; 897 898 // Even with DQI we'd like to only use these instructions for masking. 899 let Predicates = [HasVLX, HasDQI] in 900 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 901 X86VectorVTInfo< 4, EltVT64, VR256X>, 902 X86VectorVTInfo< 2, EltVT64, VR128X>, 903 null_frag, vextract128_extract, SchedRR, SchedMR>, 904 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 905 906 // Even with DQI we'd like to only use these instructions for masking. 907 let Predicates = [HasDQI] in { 908 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 909 X86VectorVTInfo< 8, EltVT64, VR512>, 910 X86VectorVTInfo< 2, EltVT64, VR128X>, 911 null_frag, vextract128_extract, SchedRR, SchedMR>, 912 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 913 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 914 X86VectorVTInfo<16, EltVT32, VR512>, 915 X86VectorVTInfo< 8, EltVT32, VR256X>, 916 null_frag, vextract256_extract, SchedRR, SchedMR>, 917 EVEX_V512, EVEX_CD8<32, CD8VT8>; 918 } 919} 920 921// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 922defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 923defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 924 925// extract_subvector codegen patterns with the alternative types. 926// Even with AVX512DQ we'll still use these for unmasked operations. 927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 931 932defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 933 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 934defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 935 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 936 937defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 938 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 941 942// Codegen pattern with the alternative types extract VEC128 from VEC256 943defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 944 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 945defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 946 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 947 948// Codegen pattern with the alternative types extract VEC128 from VEC512 949defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 950 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 951defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 952 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 953// Codegen pattern with the alternative types extract VEC256 from VEC512 954defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 955 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 956defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 957 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 958 959 960// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 961// smaller extract to enable EVEX->VEX. 962let Predicates = [NoVLX] in { 963def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 964 (v2i64 (VEXTRACTI128rr 965 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 966 (iPTR 1)))>; 967def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 968 (v2f64 (VEXTRACTF128rr 969 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 970 (iPTR 1)))>; 971def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 972 (v4i32 (VEXTRACTI128rr 973 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 974 (iPTR 1)))>; 975def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 976 (v4f32 (VEXTRACTF128rr 977 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 978 (iPTR 1)))>; 979def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 980 (v8i16 (VEXTRACTI128rr 981 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 982 (iPTR 1)))>; 983def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 984 (v16i8 (VEXTRACTI128rr 985 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 986 (iPTR 1)))>; 987} 988 989// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 990// smaller extract to enable EVEX->VEX. 991let Predicates = [HasVLX] in { 992def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 993 (v2i64 (VEXTRACTI32x4Z256rr 994 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 995 (iPTR 1)))>; 996def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 997 (v2f64 (VEXTRACTF32x4Z256rr 998 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 999 (iPTR 1)))>; 1000def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1001 (v4i32 (VEXTRACTI32x4Z256rr 1002 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1003 (iPTR 1)))>; 1004def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1005 (v4f32 (VEXTRACTF32x4Z256rr 1006 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1007 (iPTR 1)))>; 1008def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1009 (v8i16 (VEXTRACTI32x4Z256rr 1010 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1011 (iPTR 1)))>; 1012def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1013 (v16i8 (VEXTRACTI32x4Z256rr 1014 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1015 (iPTR 1)))>; 1016} 1017 1018 1019// Additional patterns for handling a bitcast between the vselect and the 1020// extract_subvector. 1021multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 1022 X86VectorVTInfo To, X86VectorVTInfo Cast, 1023 PatFrag vextract_extract, 1024 SDNodeXForm EXTRACT_get_vextract_imm, 1025 list<Predicate> p> { 1026let Predicates = p in { 1027 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1028 (bitconvert 1029 (To.VT (vextract_extract:$ext 1030 (From.VT From.RC:$src), (iPTR imm)))), 1031 To.RC:$src0)), 1032 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1033 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1034 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1035 1036 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1037 (bitconvert 1038 (To.VT (vextract_extract:$ext 1039 (From.VT From.RC:$src), (iPTR imm)))), 1040 Cast.ImmAllZerosV)), 1041 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1042 Cast.KRCWM:$mask, From.RC:$src, 1043 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1044} 1045} 1046 1047defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1048 v4f32x_info, vextract128_extract, 1049 EXTRACT_get_vextract128_imm, [HasVLX]>; 1050defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1051 v2f64x_info, vextract128_extract, 1052 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1053 1054defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1055 v4i32x_info, vextract128_extract, 1056 EXTRACT_get_vextract128_imm, [HasVLX]>; 1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1058 v4i32x_info, vextract128_extract, 1059 EXTRACT_get_vextract128_imm, [HasVLX]>; 1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1061 v4i32x_info, vextract128_extract, 1062 EXTRACT_get_vextract128_imm, [HasVLX]>; 1063defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1064 v2i64x_info, vextract128_extract, 1065 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1067 v2i64x_info, vextract128_extract, 1068 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1070 v2i64x_info, vextract128_extract, 1071 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1072 1073defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1074 v4f32x_info, vextract128_extract, 1075 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1076defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1077 v2f64x_info, vextract128_extract, 1078 EXTRACT_get_vextract128_imm, [HasDQI]>; 1079 1080defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1081 v4i32x_info, vextract128_extract, 1082 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1083defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1084 v4i32x_info, vextract128_extract, 1085 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1086defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1087 v4i32x_info, vextract128_extract, 1088 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1089defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1090 v2i64x_info, vextract128_extract, 1091 EXTRACT_get_vextract128_imm, [HasDQI]>; 1092defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1093 v2i64x_info, vextract128_extract, 1094 EXTRACT_get_vextract128_imm, [HasDQI]>; 1095defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1096 v2i64x_info, vextract128_extract, 1097 EXTRACT_get_vextract128_imm, [HasDQI]>; 1098 1099defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1100 v8f32x_info, vextract256_extract, 1101 EXTRACT_get_vextract256_imm, [HasDQI]>; 1102defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1103 v4f64x_info, vextract256_extract, 1104 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1105 1106defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1107 v8i32x_info, vextract256_extract, 1108 EXTRACT_get_vextract256_imm, [HasDQI]>; 1109defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1110 v8i32x_info, vextract256_extract, 1111 EXTRACT_get_vextract256_imm, [HasDQI]>; 1112defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1113 v8i32x_info, vextract256_extract, 1114 EXTRACT_get_vextract256_imm, [HasDQI]>; 1115defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1116 v4i64x_info, vextract256_extract, 1117 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1118defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1119 v4i64x_info, vextract256_extract, 1120 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1121defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1122 v4i64x_info, vextract256_extract, 1123 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1124 1125// vextractps - extract 32 bits from XMM 1126def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), 1127 (ins VR128X:$src1, u8imm:$src2), 1128 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1129 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1130 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1131 1132def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1133 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1134 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1135 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1136 addr:$dst)]>, 1137 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1138 1139//===---------------------------------------------------------------------===// 1140// AVX-512 BROADCAST 1141//--- 1142// broadcast with a scalar argument. 1143multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, 1144 string Name, 1145 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { 1146 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1147 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1148 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1149 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1150 (X86VBroadcast SrcInfo.FRC:$src), 1151 DestInfo.RC:$src0)), 1152 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1153 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1154 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1155 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1156 (X86VBroadcast SrcInfo.FRC:$src), 1157 DestInfo.ImmAllZerosV)), 1158 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1159 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1160} 1161 1162// Split version to allow mask and broadcast node to be different types. This 1163// helps support the 32x2 broadcasts. 1164multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1165 string Name, 1166 SchedWrite SchedRR, SchedWrite SchedRM, 1167 X86VectorVTInfo MaskInfo, 1168 X86VectorVTInfo DestInfo, 1169 X86VectorVTInfo SrcInfo, 1170 bit IsConvertibleToThreeAddress, 1171 SDPatternOperator UnmaskedOp = X86VBroadcast, 1172 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1173 let hasSideEffects = 0 in 1174 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1175 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1176 [(set MaskInfo.RC:$dst, 1177 (MaskInfo.VT 1178 (bitconvert 1179 (DestInfo.VT 1180 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1181 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1182 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1183 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1184 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1185 "${dst} {${mask}} {z}, $src}"), 1186 [(set MaskInfo.RC:$dst, 1187 (vselect_mask MaskInfo.KRCWM:$mask, 1188 (MaskInfo.VT 1189 (bitconvert 1190 (DestInfo.VT 1191 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1192 MaskInfo.ImmAllZerosV))], 1193 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1194 let Constraints = "$src0 = $dst" in 1195 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1196 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1197 SrcInfo.RC:$src), 1198 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1199 "${dst} {${mask}}, $src}"), 1200 [(set MaskInfo.RC:$dst, 1201 (vselect_mask MaskInfo.KRCWM:$mask, 1202 (MaskInfo.VT 1203 (bitconvert 1204 (DestInfo.VT 1205 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1206 MaskInfo.RC:$src0))], 1207 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1208 1209 let hasSideEffects = 0, mayLoad = 1 in 1210 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1211 (ins SrcInfo.ScalarMemOp:$src), 1212 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1213 [(set MaskInfo.RC:$dst, 1214 (MaskInfo.VT 1215 (bitconvert 1216 (DestInfo.VT 1217 (UnmaskedBcastOp addr:$src)))))], 1218 DestInfo.ExeDomain>, T8PD, EVEX, 1219 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1220 1221 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1222 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1223 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1224 "${dst} {${mask}} {z}, $src}"), 1225 [(set MaskInfo.RC:$dst, 1226 (vselect_mask MaskInfo.KRCWM:$mask, 1227 (MaskInfo.VT 1228 (bitconvert 1229 (DestInfo.VT 1230 (SrcInfo.BroadcastLdFrag addr:$src)))), 1231 MaskInfo.ImmAllZerosV))], 1232 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1233 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1234 1235 let Constraints = "$src0 = $dst", 1236 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1237 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1238 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1239 SrcInfo.ScalarMemOp:$src), 1240 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1241 "${dst} {${mask}}, $src}"), 1242 [(set MaskInfo.RC:$dst, 1243 (vselect_mask MaskInfo.KRCWM:$mask, 1244 (MaskInfo.VT 1245 (bitconvert 1246 (DestInfo.VT 1247 (SrcInfo.BroadcastLdFrag addr:$src)))), 1248 MaskInfo.RC:$src0))], 1249 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1250 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1251} 1252 1253// Helper class to force mask and broadcast result to same type. 1254multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, 1255 SchedWrite SchedRR, SchedWrite SchedRM, 1256 X86VectorVTInfo DestInfo, 1257 X86VectorVTInfo SrcInfo, 1258 bit IsConvertibleToThreeAddress> : 1259 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, 1260 DestInfo, DestInfo, SrcInfo, 1261 IsConvertibleToThreeAddress>; 1262 1263multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1264 AVX512VLVectorVTInfo _> { 1265 let Predicates = [HasAVX512] in { 1266 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1267 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1268 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1269 _.info128>, 1270 EVEX_V512; 1271 } 1272 1273 let Predicates = [HasVLX] in { 1274 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1275 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1276 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1277 _.info128>, 1278 EVEX_V256; 1279 } 1280} 1281 1282multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1283 AVX512VLVectorVTInfo _> { 1284 let Predicates = [HasAVX512] in { 1285 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1286 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1287 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1288 _.info128>, 1289 EVEX_V512; 1290 } 1291 1292 let Predicates = [HasVLX] in { 1293 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1294 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1295 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1296 _.info128>, 1297 EVEX_V256; 1298 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1299 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1300 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, 1301 _.info128>, 1302 EVEX_V128; 1303 } 1304} 1305defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1306 avx512vl_f32_info>; 1307defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1308 avx512vl_f64_info>, VEX_W1X; 1309 1310multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1311 X86VectorVTInfo _, SDPatternOperator OpNode, 1312 RegisterClass SrcRC> { 1313 // Fold with a mask even if it has multiple uses since it is cheap. 1314 let ExeDomain = _.ExeDomain in 1315 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1316 (ins SrcRC:$src), 1317 "vpbroadcast"#_.Suffix, "$src", "$src", 1318 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1319 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1320 T8PD, EVEX, Sched<[SchedRR]>; 1321} 1322 1323multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1324 X86VectorVTInfo _, SDPatternOperator OpNode, 1325 RegisterClass SrcRC, SubRegIndex Subreg> { 1326 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1327 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1328 (outs _.RC:$dst), (ins GR32:$src), 1329 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1330 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1331 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1332 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1333 1334 def : Pat <(_.VT (OpNode SrcRC:$src)), 1335 (!cast<Instruction>(Name#rr) 1336 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1337 1338 // Fold with a mask even if it has multiple uses since it is cheap. 1339 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1340 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1341 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1342 1343 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1344 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1345 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1346} 1347 1348multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1349 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1350 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1351 let Predicates = [prd] in 1352 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1353 OpNode, SrcRC, Subreg>, EVEX_V512; 1354 let Predicates = [prd, HasVLX] in { 1355 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1356 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1357 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1358 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1359 } 1360} 1361 1362multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1363 SDPatternOperator OpNode, 1364 RegisterClass SrcRC, Predicate prd> { 1365 let Predicates = [prd] in 1366 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1367 SrcRC>, EVEX_V512; 1368 let Predicates = [prd, HasVLX] in { 1369 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1370 SrcRC>, EVEX_V256; 1371 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1372 SrcRC>, EVEX_V128; 1373 } 1374} 1375 1376defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1377 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1378defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1379 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1380 HasBWI>; 1381defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1382 X86VBroadcast, GR32, HasAVX512>; 1383defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1384 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1385 1386multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1387 AVX512VLVectorVTInfo _, Predicate prd, 1388 bit IsConvertibleToThreeAddress> { 1389 let Predicates = [prd] in { 1390 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1391 WriteShuffle256Ld, _.info512, _.info128, 1392 IsConvertibleToThreeAddress>, 1393 EVEX_V512; 1394 } 1395 let Predicates = [prd, HasVLX] in { 1396 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1397 WriteShuffle256Ld, _.info256, _.info128, 1398 IsConvertibleToThreeAddress>, 1399 EVEX_V256; 1400 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, 1401 WriteShuffleXLd, _.info128, _.info128, 1402 IsConvertibleToThreeAddress>, 1403 EVEX_V128; 1404 } 1405} 1406 1407defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1408 avx512vl_i8_info, HasBWI, 0>; 1409defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1410 avx512vl_i16_info, HasBWI, 0>; 1411defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1412 avx512vl_i32_info, HasAVX512, 1>; 1413defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1414 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1415 1416multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1417 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1418 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1419 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1420 (_Dst.VT (X86SubVBroadcast 1421 (_Src.VT (_Src.LdFrag addr:$src))))>, 1422 Sched<[SchedWriteShuffle.YMM.Folded]>, 1423 AVX5128IBase, EVEX; 1424} 1425 1426// This should be used for the AVX512DQ broadcast instructions. It disables 1427// the unmasked patterns so that we only use the DQ instructions when masking 1428// is requested. 1429multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1430 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1431 let hasSideEffects = 0, mayLoad = 1 in 1432 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1433 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1434 (null_frag), 1435 (_Dst.VT (X86SubVBroadcast 1436 (_Src.VT (_Src.LdFrag addr:$src))))>, 1437 Sched<[SchedWriteShuffle.YMM.Folded]>, 1438 AVX5128IBase, EVEX; 1439} 1440 1441//===----------------------------------------------------------------------===// 1442// AVX-512 BROADCAST SUBVECTORS 1443// 1444 1445defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1446 v16i32_info, v4i32x_info>, 1447 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1448defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1449 v16f32_info, v4f32x_info>, 1450 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1451defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1452 v8i64_info, v4i64x_info>, VEX_W, 1453 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1454defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1455 v8f64_info, v4f64x_info>, VEX_W, 1456 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1457 1458let Predicates = [HasAVX512] in { 1459def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))), 1460 (VBROADCASTF64X4rm addr:$src)>; 1461def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))), 1462 (VBROADCASTI64X4rm addr:$src)>; 1463def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))), 1464 (VBROADCASTI64X4rm addr:$src)>; 1465def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))), 1466 (VBROADCASTI64X4rm addr:$src)>; 1467 1468// Provide fallback in case the load node that is used in the patterns above 1469// is used by additional users, which prevents the pattern selection. 1470def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), 1471 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1472 (v4f64 VR256X:$src), 1)>; 1473def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), 1474 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1475 (v8f32 VR256X:$src), 1)>; 1476def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), 1477 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1478 (v4i64 VR256X:$src), 1)>; 1479def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), 1480 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1481 (v8i32 VR256X:$src), 1)>; 1482def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))), 1483 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1484 (v16i16 VR256X:$src), 1)>; 1485def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))), 1486 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1487 (v32i8 VR256X:$src), 1)>; 1488 1489def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1490 (VBROADCASTF32X4rm addr:$src)>; 1491def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1492 (VBROADCASTI32X4rm addr:$src)>; 1493def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 1494 (VBROADCASTI32X4rm addr:$src)>; 1495def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 1496 (VBROADCASTI32X4rm addr:$src)>; 1497 1498// Patterns for selects of bitcasted operations. 1499def : Pat<(vselect_mask VK16WM:$mask, 1500 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1501 (v16f32 immAllZerosV)), 1502 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1503def : Pat<(vselect_mask VK16WM:$mask, 1504 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1505 VR512:$src0), 1506 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1507def : Pat<(vselect_mask VK16WM:$mask, 1508 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1509 (v16i32 immAllZerosV)), 1510 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1511def : Pat<(vselect_mask VK16WM:$mask, 1512 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1513 VR512:$src0), 1514 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1515 1516def : Pat<(vselect_mask VK8WM:$mask, 1517 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1518 (v8f64 immAllZerosV)), 1519 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1520def : Pat<(vselect_mask VK8WM:$mask, 1521 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1522 VR512:$src0), 1523 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1524def : Pat<(vselect_mask VK8WM:$mask, 1525 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), 1526 (v8i64 immAllZerosV)), 1527 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1528def : Pat<(vselect_mask VK8WM:$mask, 1529 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), 1530 VR512:$src0), 1531 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1532} 1533 1534let Predicates = [HasVLX] in { 1535defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1536 v8i32x_info, v4i32x_info>, 1537 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1538defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1539 v8f32x_info, v4f32x_info>, 1540 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1541 1542def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1543 (VBROADCASTF32X4Z256rm addr:$src)>; 1544def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1545 (VBROADCASTI32X4Z256rm addr:$src)>; 1546def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 1547 (VBROADCASTI32X4Z256rm addr:$src)>; 1548def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 1549 (VBROADCASTI32X4Z256rm addr:$src)>; 1550 1551// Patterns for selects of bitcasted operations. 1552def : Pat<(vselect_mask VK8WM:$mask, 1553 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1554 (v8f32 immAllZerosV)), 1555 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1556def : Pat<(vselect_mask VK8WM:$mask, 1557 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1558 VR256X:$src0), 1559 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1560def : Pat<(vselect_mask VK8WM:$mask, 1561 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1562 (v8i32 immAllZerosV)), 1563 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1564def : Pat<(vselect_mask VK8WM:$mask, 1565 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1566 VR256X:$src0), 1567 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1568 1569 1570// Provide fallback in case the load node that is used in the patterns above 1571// is used by additional users, which prevents the pattern selection. 1572def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 1573 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1574 (v2f64 VR128X:$src), 1)>; 1575def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 1576 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1577 (v4f32 VR128X:$src), 1)>; 1578def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 1579 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1580 (v2i64 VR128X:$src), 1)>; 1581def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 1582 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1583 (v4i32 VR128X:$src), 1)>; 1584def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 1585 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1586 (v8i16 VR128X:$src), 1)>; 1587def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 1588 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1589 (v16i8 VR128X:$src), 1)>; 1590} 1591 1592let Predicates = [HasVLX, HasDQI] in { 1593defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1594 v4i64x_info, v2i64x_info>, VEX_W1X, 1595 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1596defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1597 v4f64x_info, v2f64x_info>, VEX_W1X, 1598 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1599 1600// Patterns for selects of bitcasted operations. 1601def : Pat<(vselect_mask VK4WM:$mask, 1602 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1603 (v4f64 immAllZerosV)), 1604 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1605def : Pat<(vselect_mask VK4WM:$mask, 1606 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1607 VR256X:$src0), 1608 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1609def : Pat<(vselect_mask VK4WM:$mask, 1610 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1611 (v4i64 immAllZerosV)), 1612 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1613def : Pat<(vselect_mask VK4WM:$mask, 1614 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1615 VR256X:$src0), 1616 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1617} 1618 1619let Predicates = [HasDQI] in { 1620defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1621 v8i64_info, v2i64x_info>, VEX_W, 1622 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1623defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1624 v16i32_info, v8i32x_info>, 1625 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1626defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1627 v8f64_info, v2f64x_info>, VEX_W, 1628 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1629defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1630 v16f32_info, v8f32x_info>, 1631 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1632 1633// Patterns for selects of bitcasted operations. 1634def : Pat<(vselect_mask VK16WM:$mask, 1635 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1636 (v16f32 immAllZerosV)), 1637 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1638def : Pat<(vselect_mask VK16WM:$mask, 1639 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1640 VR512:$src0), 1641 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1642def : Pat<(vselect_mask VK16WM:$mask, 1643 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1644 (v16i32 immAllZerosV)), 1645 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1646def : Pat<(vselect_mask VK16WM:$mask, 1647 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1648 VR512:$src0), 1649 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1650 1651def : Pat<(vselect_mask VK8WM:$mask, 1652 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1653 (v8f64 immAllZerosV)), 1654 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1655def : Pat<(vselect_mask VK8WM:$mask, 1656 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1657 VR512:$src0), 1658 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1659def : Pat<(vselect_mask VK8WM:$mask, 1660 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1661 (v8i64 immAllZerosV)), 1662 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1663def : Pat<(vselect_mask VK8WM:$mask, 1664 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), 1665 VR512:$src0), 1666 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1667} 1668 1669multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1670 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { 1671 let Predicates = [HasDQI] in 1672 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1673 WriteShuffle256Ld, _Dst.info512, 1674 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1675 EVEX_V512; 1676 let Predicates = [HasDQI, HasVLX] in 1677 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1678 WriteShuffle256Ld, _Dst.info256, 1679 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1680 EVEX_V256; 1681} 1682 1683multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1684 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : 1685 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1686 1687 let Predicates = [HasDQI, HasVLX] in 1688 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, 1689 WriteShuffleXLd, _Dst.info128, 1690 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1691 EVEX_V128; 1692} 1693 1694defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1695 avx512vl_i32_info, avx512vl_i64_info>; 1696defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1697 avx512vl_f32_info, avx512vl_f64_info>; 1698 1699//===----------------------------------------------------------------------===// 1700// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1701//--- 1702multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1703 X86VectorVTInfo _, RegisterClass KRC> { 1704 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1705 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1706 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1707 EVEX, Sched<[WriteShuffle]>; 1708} 1709 1710multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1711 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1712 let Predicates = [HasCDI] in 1713 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1714 let Predicates = [HasCDI, HasVLX] in { 1715 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1716 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1717 } 1718} 1719 1720defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1721 avx512vl_i32_info, VK16>; 1722defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1723 avx512vl_i64_info, VK8>, VEX_W; 1724 1725//===----------------------------------------------------------------------===// 1726// -- VPERMI2 - 3 source operands form -- 1727multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1728 X86FoldableSchedWrite sched, 1729 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1730let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1731 hasSideEffects = 0 in { 1732 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1733 (ins _.RC:$src2, _.RC:$src3), 1734 OpcodeStr, "$src3, $src2", "$src2, $src3", 1735 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1736 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1737 1738 let mayLoad = 1 in 1739 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1740 (ins _.RC:$src2, _.MemOp:$src3), 1741 OpcodeStr, "$src3, $src2", "$src2, $src3", 1742 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1743 (_.VT (_.LdFrag addr:$src3)))), 1>, 1744 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1745 } 1746} 1747 1748multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1749 X86FoldableSchedWrite sched, 1750 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1751 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1752 hasSideEffects = 0, mayLoad = 1 in 1753 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1754 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1755 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1756 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1757 (_.VT (X86VPermt2 _.RC:$src2, 1758 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1759 AVX5128IBase, EVEX_4V, EVEX_B, 1760 Sched<[sched.Folded, sched.ReadAfterFold]>; 1761} 1762 1763multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1764 X86FoldableSchedWrite sched, 1765 AVX512VLVectorVTInfo VTInfo, 1766 AVX512VLVectorVTInfo ShuffleMask> { 1767 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1768 ShuffleMask.info512>, 1769 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1770 ShuffleMask.info512>, EVEX_V512; 1771 let Predicates = [HasVLX] in { 1772 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1773 ShuffleMask.info128>, 1774 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1775 ShuffleMask.info128>, EVEX_V128; 1776 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1777 ShuffleMask.info256>, 1778 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1779 ShuffleMask.info256>, EVEX_V256; 1780 } 1781} 1782 1783multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1784 X86FoldableSchedWrite sched, 1785 AVX512VLVectorVTInfo VTInfo, 1786 AVX512VLVectorVTInfo Idx, 1787 Predicate Prd> { 1788 let Predicates = [Prd] in 1789 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1790 Idx.info512>, EVEX_V512; 1791 let Predicates = [Prd, HasVLX] in { 1792 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1793 Idx.info128>, EVEX_V128; 1794 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1795 Idx.info256>, EVEX_V256; 1796 } 1797} 1798 1799defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1800 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1801defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1802 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1803defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1804 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1805 VEX_W, EVEX_CD8<16, CD8VF>; 1806defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1807 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1808 EVEX_CD8<8, CD8VF>; 1809defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1810 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1811defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1812 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1813 1814// Extra patterns to deal with extra bitcasts due to passthru and index being 1815// different types on the fp versions. 1816multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1817 X86VectorVTInfo IdxVT, 1818 X86VectorVTInfo CastVT> { 1819 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1820 (X86VPermt2 (_.VT _.RC:$src2), 1821 (IdxVT.VT (bitconvert 1822 (CastVT.VT _.RC:$src1))), 1823 _.RC:$src3), 1824 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1825 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1826 _.RC:$src2, _.RC:$src3)>; 1827 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1828 (X86VPermt2 _.RC:$src2, 1829 (IdxVT.VT (bitconvert 1830 (CastVT.VT _.RC:$src1))), 1831 (_.LdFrag addr:$src3)), 1832 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1833 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1834 _.RC:$src2, addr:$src3)>; 1835 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1836 (X86VPermt2 _.RC:$src2, 1837 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1838 (_.BroadcastLdFrag addr:$src3)), 1839 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1840 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1841 _.RC:$src2, addr:$src3)>; 1842} 1843 1844// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1845defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1846defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1847defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1848 1849// VPERMT2 1850multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1851 X86FoldableSchedWrite sched, 1852 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1853let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1854 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1855 (ins IdxVT.RC:$src2, _.RC:$src3), 1856 OpcodeStr, "$src3, $src2", "$src2, $src3", 1857 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1858 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1859 1860 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1861 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1862 OpcodeStr, "$src3, $src2", "$src2, $src3", 1863 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1864 (_.LdFrag addr:$src3))), 1>, 1865 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1866 } 1867} 1868multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1869 X86FoldableSchedWrite sched, 1870 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1871 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1872 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1873 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1874 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1875 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1876 (_.VT (X86VPermt2 _.RC:$src1, 1877 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1878 AVX5128IBase, EVEX_4V, EVEX_B, 1879 Sched<[sched.Folded, sched.ReadAfterFold]>; 1880} 1881 1882multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1883 X86FoldableSchedWrite sched, 1884 AVX512VLVectorVTInfo VTInfo, 1885 AVX512VLVectorVTInfo ShuffleMask> { 1886 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1887 ShuffleMask.info512>, 1888 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1889 ShuffleMask.info512>, EVEX_V512; 1890 let Predicates = [HasVLX] in { 1891 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1892 ShuffleMask.info128>, 1893 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1894 ShuffleMask.info128>, EVEX_V128; 1895 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1896 ShuffleMask.info256>, 1897 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1898 ShuffleMask.info256>, EVEX_V256; 1899 } 1900} 1901 1902multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1903 X86FoldableSchedWrite sched, 1904 AVX512VLVectorVTInfo VTInfo, 1905 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1906 let Predicates = [Prd] in 1907 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1908 Idx.info512>, EVEX_V512; 1909 let Predicates = [Prd, HasVLX] in { 1910 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1911 Idx.info128>, EVEX_V128; 1912 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1913 Idx.info256>, EVEX_V256; 1914 } 1915} 1916 1917defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1918 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1919defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1920 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1921defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1922 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1923 VEX_W, EVEX_CD8<16, CD8VF>; 1924defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1925 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1926 EVEX_CD8<8, CD8VF>; 1927defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1928 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1929defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1930 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1931 1932//===----------------------------------------------------------------------===// 1933// AVX-512 - BLEND using mask 1934// 1935 1936multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1937 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1938 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1939 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1940 (ins _.RC:$src1, _.RC:$src2), 1941 !strconcat(OpcodeStr, 1942 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1943 EVEX_4V, Sched<[sched]>; 1944 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1945 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1946 !strconcat(OpcodeStr, 1947 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1948 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1949 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1950 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1951 !strconcat(OpcodeStr, 1952 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1953 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1954 let mayLoad = 1 in { 1955 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1956 (ins _.RC:$src1, _.MemOp:$src2), 1957 !strconcat(OpcodeStr, 1958 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1959 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1960 Sched<[sched.Folded, sched.ReadAfterFold]>; 1961 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1962 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1963 !strconcat(OpcodeStr, 1964 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1965 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1966 Sched<[sched.Folded, sched.ReadAfterFold]>; 1967 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1968 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1969 !strconcat(OpcodeStr, 1970 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1971 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1972 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1973 } 1974 } 1975} 1976multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1977 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1978 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1979 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1980 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1981 !strconcat(OpcodeStr, 1982 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1983 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1984 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1985 Sched<[sched.Folded, sched.ReadAfterFold]>; 1986 1987 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1988 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1989 !strconcat(OpcodeStr, 1990 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1991 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1992 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1993 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1994 1995 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1996 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1997 !strconcat(OpcodeStr, 1998 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1999 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2000 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2001 Sched<[sched.Folded, sched.ReadAfterFold]>; 2002 } 2003} 2004 2005multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2006 AVX512VLVectorVTInfo VTInfo> { 2007 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2008 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2009 EVEX_V512; 2010 2011 let Predicates = [HasVLX] in { 2012 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2013 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2014 EVEX_V256; 2015 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2016 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2017 EVEX_V128; 2018 } 2019} 2020 2021multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2022 AVX512VLVectorVTInfo VTInfo> { 2023 let Predicates = [HasBWI] in 2024 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2025 EVEX_V512; 2026 2027 let Predicates = [HasBWI, HasVLX] in { 2028 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2029 EVEX_V256; 2030 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2031 EVEX_V128; 2032 } 2033} 2034 2035defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2036 avx512vl_f32_info>; 2037defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2038 avx512vl_f64_info>, VEX_W; 2039defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2040 avx512vl_i32_info>; 2041defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2042 avx512vl_i64_info>, VEX_W; 2043defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2044 avx512vl_i8_info>; 2045defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2046 avx512vl_i16_info>, VEX_W; 2047 2048//===----------------------------------------------------------------------===// 2049// Compare Instructions 2050//===----------------------------------------------------------------------===// 2051 2052// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2053 2054multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2055 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2056 X86FoldableSchedWrite sched> { 2057 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2058 (outs _.KRC:$dst), 2059 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2060 "vcmp"#_.Suffix, 2061 "$cc, $src2, $src1", "$src1, $src2, $cc", 2062 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2063 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2064 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2065 let mayLoad = 1 in 2066 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2067 (outs _.KRC:$dst), 2068 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2069 "vcmp"#_.Suffix, 2070 "$cc, $src2, $src1", "$src1, $src2, $cc", 2071 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2072 timm:$cc), 2073 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2074 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2075 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2076 2077 let Uses = [MXCSR] in 2078 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2079 (outs _.KRC:$dst), 2080 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2081 "vcmp"#_.Suffix, 2082 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2083 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2084 timm:$cc), 2085 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2086 timm:$cc)>, 2087 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2088 2089 let isCodeGenOnly = 1 in { 2090 let isCommutable = 1 in 2091 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2092 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2093 !strconcat("vcmp", _.Suffix, 2094 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2095 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2096 _.FRC:$src2, 2097 timm:$cc))]>, 2098 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2099 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2100 (outs _.KRC:$dst), 2101 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2102 !strconcat("vcmp", _.Suffix, 2103 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2104 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2105 (_.ScalarLdFrag addr:$src2), 2106 timm:$cc))]>, 2107 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2108 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2109 } 2110} 2111 2112def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2113 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2114 return N->hasOneUse(); 2115}]>; 2116def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2117 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2118 return N->hasOneUse(); 2119}]>; 2120 2121let Predicates = [HasAVX512] in { 2122 let ExeDomain = SSEPackedSingle in 2123 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2124 X86cmpms_su, X86cmpmsSAE_su, 2125 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2126 let ExeDomain = SSEPackedDouble in 2127 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2128 X86cmpms_su, X86cmpmsSAE_su, 2129 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2130} 2131 2132multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2133 X86FoldableSchedWrite sched, 2134 X86VectorVTInfo _, bit IsCommutable> { 2135 let isCommutable = IsCommutable, hasSideEffects = 0 in 2136 def rr : AVX512BI<opc, MRMSrcReg, 2137 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2138 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2139 []>, EVEX_4V, Sched<[sched]>; 2140 let mayLoad = 1, hasSideEffects = 0 in 2141 def rm : AVX512BI<opc, MRMSrcMem, 2142 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2143 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2144 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2145 let isCommutable = IsCommutable, hasSideEffects = 0 in 2146 def rrk : AVX512BI<opc, MRMSrcReg, 2147 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2148 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2149 "$dst {${mask}}, $src1, $src2}"), 2150 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2151 let mayLoad = 1, hasSideEffects = 0 in 2152 def rmk : AVX512BI<opc, MRMSrcMem, 2153 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2154 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2155 "$dst {${mask}}, $src1, $src2}"), 2156 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2157} 2158 2159multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2160 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2161 bit IsCommutable> : 2162 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2163 let mayLoad = 1, hasSideEffects = 0 in { 2164 def rmb : AVX512BI<opc, MRMSrcMem, 2165 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2166 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2167 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2168 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2169 def rmbk : AVX512BI<opc, MRMSrcMem, 2170 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2171 _.ScalarMemOp:$src2), 2172 !strconcat(OpcodeStr, 2173 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2174 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2175 []>, EVEX_4V, EVEX_K, EVEX_B, 2176 Sched<[sched.Folded, sched.ReadAfterFold]>; 2177 } 2178} 2179 2180multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2181 X86SchedWriteWidths sched, 2182 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2183 bit IsCommutable = 0> { 2184 let Predicates = [prd] in 2185 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2186 VTInfo.info512, IsCommutable>, EVEX_V512; 2187 2188 let Predicates = [prd, HasVLX] in { 2189 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2190 VTInfo.info256, IsCommutable>, EVEX_V256; 2191 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2192 VTInfo.info128, IsCommutable>, EVEX_V128; 2193 } 2194} 2195 2196multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2197 X86SchedWriteWidths sched, 2198 AVX512VLVectorVTInfo VTInfo, 2199 Predicate prd, bit IsCommutable = 0> { 2200 let Predicates = [prd] in 2201 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2202 VTInfo.info512, IsCommutable>, EVEX_V512; 2203 2204 let Predicates = [prd, HasVLX] in { 2205 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2206 VTInfo.info256, IsCommutable>, EVEX_V256; 2207 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2208 VTInfo.info128, IsCommutable>, EVEX_V128; 2209 } 2210} 2211 2212// This fragment treats X86cmpm as commutable to help match loads in both 2213// operands for PCMPEQ. 2214def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2215def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2216 (setcc node:$src1, node:$src2, SETGT)>; 2217 2218// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2219// increase the pattern complexity the way an immediate would. 2220let AddedComplexity = 2 in { 2221// FIXME: Is there a better scheduler class for VPCMP? 2222defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2223 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2224 EVEX_CD8<8, CD8VF>, VEX_WIG; 2225 2226defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2227 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2228 EVEX_CD8<16, CD8VF>, VEX_WIG; 2229 2230defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2231 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2232 EVEX_CD8<32, CD8VF>; 2233 2234defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2235 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2236 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2237 2238defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2239 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2240 EVEX_CD8<8, CD8VF>, VEX_WIG; 2241 2242defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2243 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2244 EVEX_CD8<16, CD8VF>, VEX_WIG; 2245 2246defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2247 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2248 EVEX_CD8<32, CD8VF>; 2249 2250defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2251 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2252 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2253} 2254 2255multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2256 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su, 2257 X86FoldableSchedWrite sched, 2258 X86VectorVTInfo _, string Name> { 2259 let isCommutable = 1 in 2260 def rri : AVX512AIi8<opc, MRMSrcReg, 2261 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2262 !strconcat("vpcmp", Suffix, 2263 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2264 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2265 (_.VT _.RC:$src2), 2266 cond)))]>, 2267 EVEX_4V, Sched<[sched]>; 2268 def rmi : AVX512AIi8<opc, MRMSrcMem, 2269 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2270 !strconcat("vpcmp", Suffix, 2271 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2272 [(set _.KRC:$dst, (_.KVT 2273 (Frag:$cc 2274 (_.VT _.RC:$src1), 2275 (_.VT (_.LdFrag addr:$src2)), 2276 cond)))]>, 2277 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2278 let isCommutable = 1 in 2279 def rrik : AVX512AIi8<opc, MRMSrcReg, 2280 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2281 u8imm:$cc), 2282 !strconcat("vpcmp", Suffix, 2283 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2284 "$dst {${mask}}, $src1, $src2, $cc}"), 2285 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2286 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2287 (_.VT _.RC:$src2), 2288 cond))))]>, 2289 EVEX_4V, EVEX_K, Sched<[sched]>; 2290 def rmik : AVX512AIi8<opc, MRMSrcMem, 2291 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2292 u8imm:$cc), 2293 !strconcat("vpcmp", Suffix, 2294 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2295 "$dst {${mask}}, $src1, $src2, $cc}"), 2296 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2297 (_.KVT 2298 (Frag_su:$cc 2299 (_.VT _.RC:$src1), 2300 (_.VT (_.LdFrag addr:$src2)), 2301 cond))))]>, 2302 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2303 2304 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2), 2305 (_.VT _.RC:$src1), cond)), 2306 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2307 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2308 2309 def : Pat<(and _.KRCWM:$mask, 2310 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2), 2311 (_.VT _.RC:$src1), cond))), 2312 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2313 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2314 (CommFrag.OperandTransform $cc))>; 2315} 2316 2317multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2318 PatFrag Frag_su, PatFrag CommFrag, 2319 PatFrag CommFrag_su, X86FoldableSchedWrite sched, 2320 X86VectorVTInfo _, string Name> : 2321 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2322 sched, _, Name> { 2323 def rmib : AVX512AIi8<opc, MRMSrcMem, 2324 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2325 u8imm:$cc), 2326 !strconcat("vpcmp", Suffix, 2327 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2328 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2329 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2330 (_.VT _.RC:$src1), 2331 (_.BroadcastLdFrag addr:$src2), 2332 cond)))]>, 2333 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2334 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2335 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2336 _.ScalarMemOp:$src2, u8imm:$cc), 2337 !strconcat("vpcmp", Suffix, 2338 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2339 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2340 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2341 (_.KVT (Frag_su:$cc 2342 (_.VT _.RC:$src1), 2343 (_.BroadcastLdFrag addr:$src2), 2344 cond))))]>, 2345 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2346 2347 def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2), 2348 (_.VT _.RC:$src1), cond)), 2349 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2350 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2351 2352 def : Pat<(and _.KRCWM:$mask, 2353 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2), 2354 (_.VT _.RC:$src1), cond))), 2355 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2356 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2357 (CommFrag_su.OperandTransform $cc))>; 2358} 2359 2360multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2361 PatFrag Frag_su, PatFrag CommFrag, 2362 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2363 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2364 let Predicates = [prd] in 2365 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2366 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2367 2368 let Predicates = [prd, HasVLX] in { 2369 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2370 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2371 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2372 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2373 } 2374} 2375 2376multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2377 PatFrag Frag_su, PatFrag CommFrag, 2378 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2379 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2380 let Predicates = [prd] in 2381 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2382 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2383 2384 let Predicates = [prd, HasVLX] in { 2385 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2386 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2387 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2388 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2389 } 2390} 2391 2392def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2393 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2394 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2395 return getI8Imm(SSECC, SDLoc(N)); 2396}]>; 2397 2398// Swapped operand version of the above. 2399def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2400 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2401 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2402 SSECC = X86::getSwappedVPCMPImm(SSECC); 2403 return getI8Imm(SSECC, SDLoc(N)); 2404}]>; 2405 2406def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2407 (setcc node:$src1, node:$src2, node:$cc), [{ 2408 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2409 return !ISD::isUnsignedIntSetCC(CC); 2410}], X86pcmpm_imm>; 2411 2412def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2413 (setcc node:$src1, node:$src2, node:$cc), [{ 2414 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2415 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2416}], X86pcmpm_imm>; 2417 2418// Same as above, but commutes immediate. Use for load folding. 2419def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2420 (setcc node:$src1, node:$src2, node:$cc), [{ 2421 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2422 return !ISD::isUnsignedIntSetCC(CC); 2423}], X86pcmpm_imm_commute>; 2424 2425def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2426 (setcc node:$src1, node:$src2, node:$cc), [{ 2427 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2428 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2429}], X86pcmpm_imm_commute>; 2430 2431def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2432 (setcc node:$src1, node:$src2, node:$cc), [{ 2433 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2434 return ISD::isUnsignedIntSetCC(CC); 2435}], X86pcmpm_imm>; 2436 2437def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2438 (setcc node:$src1, node:$src2, node:$cc), [{ 2439 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2440 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2441}], X86pcmpm_imm>; 2442 2443// Same as above, but commutes immediate. Use for load folding. 2444def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2445 (setcc node:$src1, node:$src2, node:$cc), [{ 2446 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2447 return ISD::isUnsignedIntSetCC(CC); 2448}], X86pcmpm_imm_commute>; 2449 2450def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2451 (setcc node:$src1, node:$src2, node:$cc), [{ 2452 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2453 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2454}], X86pcmpm_imm_commute>; 2455 2456// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2457defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2458 X86pcmpm_commute, X86pcmpm_commute_su, 2459 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2460 EVEX_CD8<8, CD8VF>; 2461defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2462 X86pcmpum_commute, X86pcmpum_commute_su, 2463 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2464 EVEX_CD8<8, CD8VF>; 2465 2466defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2467 X86pcmpm_commute, X86pcmpm_commute_su, 2468 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2469 VEX_W, EVEX_CD8<16, CD8VF>; 2470defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2471 X86pcmpum_commute, X86pcmpum_commute_su, 2472 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2473 VEX_W, EVEX_CD8<16, CD8VF>; 2474 2475defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2476 X86pcmpm_commute, X86pcmpm_commute_su, 2477 SchedWriteVecALU, avx512vl_i32_info, 2478 HasAVX512>, EVEX_CD8<32, CD8VF>; 2479defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2480 X86pcmpum_commute, X86pcmpum_commute_su, 2481 SchedWriteVecALU, avx512vl_i32_info, 2482 HasAVX512>, EVEX_CD8<32, CD8VF>; 2483 2484defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2485 X86pcmpm_commute, X86pcmpm_commute_su, 2486 SchedWriteVecALU, avx512vl_i64_info, 2487 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2488defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2489 X86pcmpum_commute, X86pcmpum_commute_su, 2490 SchedWriteVecALU, avx512vl_i64_info, 2491 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2492 2493def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2494 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2495 return N->hasOneUse(); 2496}]>; 2497def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2498 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{ 2499 return N->hasOneUse(); 2500}]>; 2501 2502def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2503 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2504 return getI8Imm(Imm, SDLoc(N)); 2505}]>; 2506 2507multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2508 string Name> { 2509let Uses = [MXCSR], mayRaiseFPException = 1 in { 2510 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2511 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2512 "vcmp"#_.Suffix, 2513 "$cc, $src2, $src1", "$src1, $src2, $cc", 2514 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2515 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2516 1>, Sched<[sched]>; 2517 2518 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2519 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2520 "vcmp"#_.Suffix, 2521 "$cc, $src2, $src1", "$src1, $src2, $cc", 2522 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2523 timm:$cc), 2524 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2525 timm:$cc)>, 2526 Sched<[sched.Folded, sched.ReadAfterFold]>; 2527 2528 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2529 (outs _.KRC:$dst), 2530 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2531 "vcmp"#_.Suffix, 2532 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2533 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2534 (X86any_cmpm (_.VT _.RC:$src1), 2535 (_.VT (_.BroadcastLdFrag addr:$src2)), 2536 timm:$cc), 2537 (X86cmpm_su (_.VT _.RC:$src1), 2538 (_.VT (_.BroadcastLdFrag addr:$src2)), 2539 timm:$cc)>, 2540 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2541 } 2542 2543 // Patterns for selecting with loads in other operand. 2544 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2545 timm:$cc), 2546 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2547 (X86cmpm_imm_commute timm:$cc))>; 2548 2549 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2550 (_.VT _.RC:$src1), 2551 timm:$cc)), 2552 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2553 _.RC:$src1, addr:$src2, 2554 (X86cmpm_imm_commute timm:$cc))>; 2555 2556 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2557 (_.VT _.RC:$src1), timm:$cc), 2558 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2559 (X86cmpm_imm_commute timm:$cc))>; 2560 2561 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2562 (_.VT _.RC:$src1), 2563 timm:$cc)), 2564 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2565 _.RC:$src1, addr:$src2, 2566 (X86cmpm_imm_commute timm:$cc))>; 2567} 2568 2569multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2570 // comparison code form (VCMP[EQ/LT/LE/...] 2571 let Uses = [MXCSR] in 2572 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2573 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2574 "vcmp"#_.Suffix, 2575 "$cc, {sae}, $src2, $src1", 2576 "$src1, $src2, {sae}, $cc", 2577 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2578 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2579 timm:$cc)>, 2580 EVEX_B, Sched<[sched]>; 2581} 2582 2583multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 2584 let Predicates = [HasAVX512] in { 2585 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2586 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2587 2588 } 2589 let Predicates = [HasAVX512,HasVLX] in { 2590 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2591 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2592 } 2593} 2594 2595defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2596 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2597defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2598 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2599 2600// Patterns to select fp compares with load as first operand. 2601let Predicates = [HasAVX512] in { 2602 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, 2603 timm:$cc)), 2604 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2605 2606 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, 2607 timm:$cc)), 2608 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2609} 2610 2611// ---------------------------------------------------------------- 2612// FPClass 2613 2614def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2615 (X86Vfpclasss node:$src1, node:$src2), [{ 2616 return N->hasOneUse(); 2617}]>; 2618 2619def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2620 (X86Vfpclass node:$src1, node:$src2), [{ 2621 return N->hasOneUse(); 2622}]>; 2623 2624//handle fpclass instruction mask = op(reg_scalar,imm) 2625// op(mem_scalar,imm) 2626multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2627 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2628 Predicate prd> { 2629 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2630 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2631 (ins _.RC:$src1, i32u8imm:$src2), 2632 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2633 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2634 (i32 timm:$src2)))]>, 2635 Sched<[sched]>; 2636 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2637 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2638 OpcodeStr#_.Suffix# 2639 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2640 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2641 (X86Vfpclasss_su (_.VT _.RC:$src1), 2642 (i32 timm:$src2))))]>, 2643 EVEX_K, Sched<[sched]>; 2644 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2645 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2646 OpcodeStr#_.Suffix# 2647 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2648 [(set _.KRC:$dst, 2649 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2650 (i32 timm:$src2)))]>, 2651 Sched<[sched.Folded, sched.ReadAfterFold]>; 2652 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2653 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2654 OpcodeStr#_.Suffix# 2655 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2656 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2657 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2658 (i32 timm:$src2))))]>, 2659 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2660 } 2661} 2662 2663//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2664// fpclass(reg_vec, mem_vec, imm) 2665// fpclass(reg_vec, broadcast(eltVt), imm) 2666multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2667 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2668 string mem>{ 2669 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2670 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2671 (ins _.RC:$src1, i32u8imm:$src2), 2672 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2673 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2674 (i32 timm:$src2)))]>, 2675 Sched<[sched]>; 2676 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2677 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2678 OpcodeStr#_.Suffix# 2679 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2680 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2681 (X86Vfpclass_su (_.VT _.RC:$src1), 2682 (i32 timm:$src2))))]>, 2683 EVEX_K, Sched<[sched]>; 2684 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2685 (ins _.MemOp:$src1, i32u8imm:$src2), 2686 OpcodeStr#_.Suffix#"{"#mem#"}"# 2687 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2688 [(set _.KRC:$dst,(X86Vfpclass 2689 (_.VT (_.LdFrag addr:$src1)), 2690 (i32 timm:$src2)))]>, 2691 Sched<[sched.Folded, sched.ReadAfterFold]>; 2692 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2693 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2694 OpcodeStr#_.Suffix#"{"#mem#"}"# 2695 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2696 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2697 (_.VT (_.LdFrag addr:$src1)), 2698 (i32 timm:$src2))))]>, 2699 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2700 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2701 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2702 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2703 _.BroadcastStr#", $dst|$dst, ${src1}" 2704 #_.BroadcastStr#", $src2}", 2705 [(set _.KRC:$dst,(X86Vfpclass 2706 (_.VT (_.BroadcastLdFrag addr:$src1)), 2707 (i32 timm:$src2)))]>, 2708 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2709 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2710 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2711 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2712 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2713 _.BroadcastStr#", $src2}", 2714 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2715 (_.VT (_.BroadcastLdFrag addr:$src1)), 2716 (i32 timm:$src2))))]>, 2717 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2718 } 2719 2720 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2721 // the memory form. 2722 def : InstAlias<OpcodeStr#_.Suffix#mem# 2723 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2724 (!cast<Instruction>(NAME#"rr") 2725 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2726 def : InstAlias<OpcodeStr#_.Suffix#mem# 2727 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2728 (!cast<Instruction>(NAME#"rrk") 2729 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2730 def : InstAlias<OpcodeStr#_.Suffix#mem# 2731 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2732 _.BroadcastStr#", $src2}", 2733 (!cast<Instruction>(NAME#"rmb") 2734 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2735 def : InstAlias<OpcodeStr#_.Suffix#mem# 2736 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2737 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2738 (!cast<Instruction>(NAME#"rmbk") 2739 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2740} 2741 2742multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2743 bits<8> opc, X86SchedWriteWidths sched, 2744 Predicate prd>{ 2745 let Predicates = [prd] in { 2746 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2747 _.info512, "z">, EVEX_V512; 2748 } 2749 let Predicates = [prd, HasVLX] in { 2750 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2751 _.info128, "x">, EVEX_V128; 2752 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2753 _.info256, "y">, EVEX_V256; 2754 } 2755} 2756 2757multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2758 bits<8> opcScalar, X86SchedWriteWidths sched, 2759 Predicate prd> { 2760 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2761 sched, prd>, 2762 EVEX_CD8<32, CD8VF>; 2763 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2764 sched, prd>, 2765 EVEX_CD8<64, CD8VF> , VEX_W; 2766 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2767 sched.Scl, f32x_info, prd>, VEX_LIG, 2768 EVEX_CD8<32, CD8VT1>; 2769 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2770 sched.Scl, f64x_info, prd>, VEX_LIG, 2771 EVEX_CD8<64, CD8VT1>, VEX_W; 2772} 2773 2774defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp, 2775 HasDQI>, AVX512AIi8Base, EVEX; 2776 2777//----------------------------------------------------------------- 2778// Mask register copy, including 2779// - copy between mask registers 2780// - load/store mask registers 2781// - copy from GPR to mask register and vice versa 2782// 2783multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2784 string OpcodeStr, RegisterClass KRC, 2785 ValueType vvt, X86MemOperand x86memop> { 2786 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2787 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2788 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2789 Sched<[WriteMove]>; 2790 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2791 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2792 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2793 Sched<[WriteLoad]>; 2794 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2795 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2796 [(store KRC:$src, addr:$dst)]>, 2797 Sched<[WriteStore]>; 2798} 2799 2800multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2801 string OpcodeStr, 2802 RegisterClass KRC, RegisterClass GRC> { 2803 let hasSideEffects = 0 in { 2804 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2805 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2806 Sched<[WriteMove]>; 2807 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2808 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2809 Sched<[WriteMove]>; 2810 } 2811} 2812 2813let Predicates = [HasDQI] in 2814 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2815 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2816 VEX, PD; 2817 2818let Predicates = [HasAVX512] in 2819 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2820 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2821 VEX, PS; 2822 2823let Predicates = [HasBWI] in { 2824 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2825 VEX, PD, VEX_W; 2826 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2827 VEX, XD; 2828 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2829 VEX, PS, VEX_W; 2830 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2831 VEX, XD, VEX_W; 2832} 2833 2834// GR from/to mask register 2835def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2836 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2837def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2838 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2839 2840def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2841 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2842def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2843 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2844 2845def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2846 (KMOVWrk VK16:$src)>; 2847def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2848 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2849def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2850 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2851def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2852 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2853 2854def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2855 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2856def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2857 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2858def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2859 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2860def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2861 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2862 2863def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2864 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2865def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2866 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2867def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2868 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2869def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2870 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2871 2872// Load/store kreg 2873let Predicates = [HasDQI] in { 2874 def : Pat<(store VK1:$src, addr:$dst), 2875 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; 2876 2877 def : Pat<(v1i1 (load addr:$src)), 2878 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2879 def : Pat<(v2i1 (load addr:$src)), 2880 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2881 def : Pat<(v4i1 (load addr:$src)), 2882 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2883} 2884 2885let Predicates = [HasAVX512] in { 2886 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2887 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2888 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2889 (KMOVWkm addr:$src)>; 2890} 2891 2892def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2893 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2894 SDTCVecEltisVT<1, i1>, 2895 SDTCisPtrTy<2>]>>; 2896 2897let Predicates = [HasAVX512] in { 2898 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2899 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2900 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2901 2902 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2903 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2904 2905 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2906 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2907 2908 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2909 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2910 } 2911 2912 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2913 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2914 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2915 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2916 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2917 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2918 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2919 2920 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2921 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2922 (COPY_TO_REGCLASS 2923 (KMOVWkr (AND32ri8 2924 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2925 (i32 1))), VK16)>; 2926} 2927 2928// Mask unary operation 2929// - KNOT 2930multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2931 RegisterClass KRC, SDPatternOperator OpNode, 2932 X86FoldableSchedWrite sched, Predicate prd> { 2933 let Predicates = [prd] in 2934 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2935 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2936 [(set KRC:$dst, (OpNode KRC:$src))]>, 2937 Sched<[sched]>; 2938} 2939 2940multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2941 SDPatternOperator OpNode, 2942 X86FoldableSchedWrite sched> { 2943 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2944 sched, HasDQI>, VEX, PD; 2945 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2946 sched, HasAVX512>, VEX, PS; 2947 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2948 sched, HasBWI>, VEX, PD, VEX_W; 2949 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2950 sched, HasBWI>, VEX, PS, VEX_W; 2951} 2952 2953// TODO - do we need a X86SchedWriteWidths::KMASK type? 2954defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2955 2956// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2957let Predicates = [HasAVX512, NoDQI] in 2958def : Pat<(vnot VK8:$src), 2959 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2960 2961def : Pat<(vnot VK4:$src), 2962 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2963def : Pat<(vnot VK2:$src), 2964 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2965def : Pat<(vnot VK1:$src), 2966 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 2967 2968// Mask binary operation 2969// - KAND, KANDN, KOR, KXNOR, KXOR 2970multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2971 RegisterClass KRC, SDPatternOperator OpNode, 2972 X86FoldableSchedWrite sched, Predicate prd, 2973 bit IsCommutable> { 2974 let Predicates = [prd], isCommutable = IsCommutable in 2975 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2976 !strconcat(OpcodeStr, 2977 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2978 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2979 Sched<[sched]>; 2980} 2981 2982multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2983 SDPatternOperator OpNode, 2984 X86FoldableSchedWrite sched, bit IsCommutable, 2985 Predicate prdW = HasAVX512> { 2986 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2987 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 2988 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2989 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 2990 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2991 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 2992 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2993 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 2994} 2995 2996// These nodes use 'vnot' instead of 'not' to support vectors. 2997def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 2998def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 2999 3000// TODO - do we need a X86SchedWriteWidths::KMASK type? 3001defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3002defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3003defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3004defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3005defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3006defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3007 3008multiclass avx512_binop_pat<SDPatternOperator VOpNode, 3009 Instruction Inst> { 3010 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3011 // for the DQI set, this type is legal and KxxxB instruction is used 3012 let Predicates = [NoDQI] in 3013 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3014 (COPY_TO_REGCLASS 3015 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3016 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3017 3018 // All types smaller than 8 bits require conversion anyway 3019 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 3020 (COPY_TO_REGCLASS (Inst 3021 (COPY_TO_REGCLASS VK1:$src1, VK16), 3022 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3023 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3024 (COPY_TO_REGCLASS (Inst 3025 (COPY_TO_REGCLASS VK2:$src1, VK16), 3026 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 3027 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3028 (COPY_TO_REGCLASS (Inst 3029 (COPY_TO_REGCLASS VK4:$src1, VK16), 3030 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 3031} 3032 3033defm : avx512_binop_pat<and, KANDWrr>; 3034defm : avx512_binop_pat<vandn, KANDNWrr>; 3035defm : avx512_binop_pat<or, KORWrr>; 3036defm : avx512_binop_pat<vxnor, KXNORWrr>; 3037defm : avx512_binop_pat<xor, KXORWrr>; 3038 3039// Mask unpacking 3040multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3041 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3042 Predicate prd> { 3043 let Predicates = [prd] in { 3044 let hasSideEffects = 0 in 3045 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3046 (ins Src.KRC:$src1, Src.KRC:$src2), 3047 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3048 VEX_4V, VEX_L, Sched<[sched]>; 3049 3050 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3051 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 3052 } 3053} 3054 3055defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3056defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3057defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3058 3059// Mask bit testing 3060multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3061 SDNode OpNode, X86FoldableSchedWrite sched, 3062 Predicate prd> { 3063 let Predicates = [prd], Defs = [EFLAGS] in 3064 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3065 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3066 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3067 Sched<[sched]>; 3068} 3069 3070multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3071 X86FoldableSchedWrite sched, 3072 Predicate prdW = HasAVX512> { 3073 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3074 VEX, PD; 3075 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3076 VEX, PS; 3077 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3078 VEX, PS, VEX_W; 3079 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3080 VEX, PD, VEX_W; 3081} 3082 3083// TODO - do we need a X86SchedWriteWidths::KMASK type? 3084defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3085defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3086 3087// Mask shift 3088multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3089 SDNode OpNode, X86FoldableSchedWrite sched> { 3090 let Predicates = [HasAVX512] in 3091 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3092 !strconcat(OpcodeStr, 3093 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3094 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3095 Sched<[sched]>; 3096} 3097 3098multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3099 SDNode OpNode, X86FoldableSchedWrite sched> { 3100 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3101 sched>, VEX, TAPD, VEX_W; 3102 let Predicates = [HasDQI] in 3103 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3104 sched>, VEX, TAPD; 3105 let Predicates = [HasBWI] in { 3106 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3107 sched>, VEX, TAPD, VEX_W; 3108 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3109 sched>, VEX, TAPD; 3110 } 3111} 3112 3113defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3114defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3115 3116// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3117multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3118 string InstStr, 3119 X86VectorVTInfo Narrow, 3120 X86VectorVTInfo Wide> { 3121def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3122 (Narrow.VT Narrow.RC:$src2), cond)), 3123 (COPY_TO_REGCLASS 3124 (!cast<Instruction>(InstStr#"Zrri") 3125 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3126 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3127 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3128 3129def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3130 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3131 (Narrow.VT Narrow.RC:$src2), 3132 cond)))), 3133 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3134 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3135 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3136 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3137 (Frag_su.OperandTransform $cc)), Narrow.KRC)>; 3138} 3139 3140multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3141 PatFrag CommFrag, PatFrag CommFrag_su, 3142 string InstStr, 3143 X86VectorVTInfo Narrow, 3144 X86VectorVTInfo Wide> { 3145// Broadcast load. 3146def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3147 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3148 (COPY_TO_REGCLASS 3149 (!cast<Instruction>(InstStr#"Zrmib") 3150 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3151 addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; 3152 3153def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3154 (Narrow.KVT 3155 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3156 (Narrow.BroadcastLdFrag addr:$src2), 3157 cond)))), 3158 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3159 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3160 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3161 addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; 3162 3163// Commuted with broadcast load. 3164def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3165 (Narrow.VT Narrow.RC:$src1), 3166 cond)), 3167 (COPY_TO_REGCLASS 3168 (!cast<Instruction>(InstStr#"Zrmib") 3169 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3170 addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; 3171 3172def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3173 (Narrow.KVT 3174 (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3175 (Narrow.VT Narrow.RC:$src1), 3176 cond)))), 3177 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3178 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3179 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3180 addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; 3181} 3182 3183// Same as above, but for fp types which don't use PatFrags. 3184multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3185 X86VectorVTInfo Narrow, 3186 X86VectorVTInfo Wide> { 3187def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3188 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3189 (COPY_TO_REGCLASS 3190 (!cast<Instruction>(InstStr#"Zrri") 3191 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3192 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3193 timm:$cc), Narrow.KRC)>; 3194 3195def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3196 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3197 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3198 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3199 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3200 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3201 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3202 timm:$cc), Narrow.KRC)>; 3203 3204// Broadcast load. 3205def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3206 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3207 (COPY_TO_REGCLASS 3208 (!cast<Instruction>(InstStr#"Zrmbi") 3209 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3210 addr:$src2, timm:$cc), Narrow.KRC)>; 3211 3212def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3213 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3214 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3215 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3216 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3217 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3218 addr:$src2, timm:$cc), Narrow.KRC)>; 3219 3220// Commuted with broadcast load. 3221def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3222 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3223 (COPY_TO_REGCLASS 3224 (!cast<Instruction>(InstStr#"Zrmbi") 3225 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3226 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3227 3228def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3229 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3230 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3231 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3232 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3233 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3234 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3235} 3236 3237let Predicates = [HasAVX512, NoVLX] in { 3238 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3239 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3240 3241 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3242 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3243 3244 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3245 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3246 3247 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3248 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3249 3250 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>; 3251 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3252 3253 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>; 3254 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3255 3256 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3257 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3258 3259 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3260 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3261 3262 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3263 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3264 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3265 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3266} 3267 3268let Predicates = [HasBWI, NoVLX] in { 3269 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3270 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3271 3272 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3273 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3274 3275 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3276 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3277 3278 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3279 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3280} 3281 3282// Mask setting all 0s or 1s 3283multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { 3284 let Predicates = [HasAVX512] in 3285 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3286 SchedRW = [WriteZero] in 3287 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3288 [(set KRC:$dst, (VT Val))]>; 3289} 3290 3291multiclass avx512_mask_setop_w<PatFrag Val> { 3292 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3293 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3294 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3295} 3296 3297defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3298defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3299 3300// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3301let Predicates = [HasAVX512] in { 3302 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3303 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3304 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3305 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3306 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3307 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3308 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3309 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3310} 3311 3312// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3313multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3314 RegisterClass RC, ValueType VT> { 3315 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3316 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3317 3318 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3319 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3320} 3321defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3322defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3323defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3324defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3325defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3326defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3327 3328defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3329defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3330defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3331defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3332defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3333 3334defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3335defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3336defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3337defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3338 3339defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3340defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3341defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3342 3343defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3344defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3345 3346defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3347 3348//===----------------------------------------------------------------------===// 3349// AVX-512 - Aligned and unaligned load and store 3350// 3351 3352multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3353 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3354 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3355 bit NoRMPattern = 0, 3356 SDPatternOperator SelectOprr = vselect> { 3357 let hasSideEffects = 0 in { 3358 let isMoveReg = 1 in 3359 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3360 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3361 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3362 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3363 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3364 (ins _.KRCWM:$mask, _.RC:$src), 3365 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3366 "${dst} {${mask}} {z}, $src}"), 3367 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3368 (_.VT _.RC:$src), 3369 _.ImmAllZerosV)))], _.ExeDomain>, 3370 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3371 3372 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3373 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3374 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3375 !if(NoRMPattern, [], 3376 [(set _.RC:$dst, 3377 (_.VT (ld_frag addr:$src)))]), 3378 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3379 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3380 3381 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3382 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3383 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3384 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3385 "${dst} {${mask}}, $src1}"), 3386 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3387 (_.VT _.RC:$src1), 3388 (_.VT _.RC:$src0))))], _.ExeDomain>, 3389 EVEX, EVEX_K, Sched<[Sched.RR]>; 3390 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3391 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3392 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3393 "${dst} {${mask}}, $src1}"), 3394 [(set _.RC:$dst, (_.VT 3395 (vselect_mask _.KRCWM:$mask, 3396 (_.VT (ld_frag addr:$src1)), 3397 (_.VT _.RC:$src0))))], _.ExeDomain>, 3398 EVEX, EVEX_K, Sched<[Sched.RM]>; 3399 } 3400 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3401 (ins _.KRCWM:$mask, _.MemOp:$src), 3402 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3403 "${dst} {${mask}} {z}, $src}", 3404 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3405 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3406 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3407 } 3408 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3409 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3410 3411 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3412 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3413 3414 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3415 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3416 _.KRCWM:$mask, addr:$ptr)>; 3417} 3418 3419multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3420 AVX512VLVectorVTInfo _, Predicate prd, 3421 X86SchedWriteMoveLSWidths Sched, 3422 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3423 let Predicates = [prd] in 3424 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3425 _.info512.AlignedLdFrag, masked_load_aligned, 3426 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3427 3428 let Predicates = [prd, HasVLX] in { 3429 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3430 _.info256.AlignedLdFrag, masked_load_aligned, 3431 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3432 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3433 _.info128.AlignedLdFrag, masked_load_aligned, 3434 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3435 } 3436} 3437 3438multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3439 AVX512VLVectorVTInfo _, Predicate prd, 3440 X86SchedWriteMoveLSWidths Sched, 3441 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3442 SDPatternOperator SelectOprr = vselect> { 3443 let Predicates = [prd] in 3444 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3445 masked_load, Sched.ZMM, "", 3446 NoRMPattern, SelectOprr>, EVEX_V512; 3447 3448 let Predicates = [prd, HasVLX] in { 3449 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3450 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3451 NoRMPattern, SelectOprr>, EVEX_V256; 3452 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3453 masked_load, Sched.XMM, EVEX2VEXOvrd, 3454 NoRMPattern, SelectOprr>, EVEX_V128; 3455 } 3456} 3457 3458multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3459 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3460 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3461 bit NoMRPattern = 0> { 3462 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3463 let isMoveReg = 1 in 3464 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3465 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3466 [], _.ExeDomain>, EVEX, 3467 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3468 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3469 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3470 (ins _.KRCWM:$mask, _.RC:$src), 3471 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3472 "${dst} {${mask}}, $src}", 3473 [], _.ExeDomain>, EVEX, EVEX_K, 3474 FoldGenData<BaseName#_.ZSuffix#rrk>, 3475 Sched<[Sched.RR]>; 3476 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3477 (ins _.KRCWM:$mask, _.RC:$src), 3478 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3479 "${dst} {${mask}} {z}, $src}", 3480 [], _.ExeDomain>, EVEX, EVEX_KZ, 3481 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3482 Sched<[Sched.RR]>; 3483 } 3484 3485 let hasSideEffects = 0, mayStore = 1 in 3486 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3487 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3488 !if(NoMRPattern, [], 3489 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3490 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3491 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3492 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3493 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3494 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3495 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3496 NotMemoryFoldable; 3497 3498 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3499 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3500 _.KRCWM:$mask, _.RC:$src)>; 3501 3502 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3503 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3504 _.RC:$dst, _.RC:$src), 0>; 3505 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3506 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3507 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3508 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3509 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3510 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3511} 3512 3513multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3514 AVX512VLVectorVTInfo _, Predicate prd, 3515 X86SchedWriteMoveLSWidths Sched, 3516 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3517 let Predicates = [prd] in 3518 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3519 masked_store, Sched.ZMM, "", 3520 NoMRPattern>, EVEX_V512; 3521 let Predicates = [prd, HasVLX] in { 3522 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3523 masked_store, Sched.YMM, 3524 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3525 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3526 masked_store, Sched.XMM, EVEX2VEXOvrd, 3527 NoMRPattern>, EVEX_V128; 3528 } 3529} 3530 3531multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3532 AVX512VLVectorVTInfo _, Predicate prd, 3533 X86SchedWriteMoveLSWidths Sched, 3534 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3535 let Predicates = [prd] in 3536 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3537 masked_store_aligned, Sched.ZMM, "", 3538 NoMRPattern>, EVEX_V512; 3539 3540 let Predicates = [prd, HasVLX] in { 3541 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3542 masked_store_aligned, Sched.YMM, 3543 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3544 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3545 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3546 NoMRPattern>, EVEX_V128; 3547 } 3548} 3549 3550defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3551 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3552 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3553 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3554 PS, EVEX_CD8<32, CD8VF>; 3555 3556defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3557 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3558 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3559 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3560 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3561 3562defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3563 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3564 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3565 SchedWriteFMoveLS, "VMOVUPS">, 3566 PS, EVEX_CD8<32, CD8VF>; 3567 3568defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3569 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3570 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3571 SchedWriteFMoveLS, "VMOVUPD">, 3572 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3573 3574defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3575 HasAVX512, SchedWriteVecMoveLS, 3576 "VMOVDQA", 1>, 3577 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3578 HasAVX512, SchedWriteVecMoveLS, 3579 "VMOVDQA", 1>, 3580 PD, EVEX_CD8<32, CD8VF>; 3581 3582defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3583 HasAVX512, SchedWriteVecMoveLS, 3584 "VMOVDQA">, 3585 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3586 HasAVX512, SchedWriteVecMoveLS, 3587 "VMOVDQA">, 3588 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3589 3590defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3591 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3592 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3593 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3594 XD, EVEX_CD8<8, CD8VF>; 3595 3596defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3597 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3598 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3599 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3600 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3601 3602defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3603 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3604 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3605 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3606 XS, EVEX_CD8<32, CD8VF>; 3607 3608defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3609 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3610 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3611 SchedWriteVecMoveLS, "VMOVDQU">, 3612 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3613 3614// Special instructions to help with spilling when we don't have VLX. We need 3615// to load or store from a ZMM register instead. These are converted in 3616// expandPostRAPseudos. 3617let isReMaterializable = 1, canFoldAsLoad = 1, 3618 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3619def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3620 "", []>, Sched<[WriteFLoadX]>; 3621def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3622 "", []>, Sched<[WriteFLoadY]>; 3623def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3624 "", []>, Sched<[WriteFLoadX]>; 3625def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3626 "", []>, Sched<[WriteFLoadY]>; 3627} 3628 3629let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3630def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3631 "", []>, Sched<[WriteFStoreX]>; 3632def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3633 "", []>, Sched<[WriteFStoreY]>; 3634def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3635 "", []>, Sched<[WriteFStoreX]>; 3636def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3637 "", []>, Sched<[WriteFStoreY]>; 3638} 3639 3640def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3641 (v8i64 VR512:$src))), 3642 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3643 VK8), VR512:$src)>; 3644 3645def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3646 (v16i32 VR512:$src))), 3647 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3648 3649// These patterns exist to prevent the above patterns from introducing a second 3650// mask inversion when one already exists. 3651def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), 3652 (v8i64 immAllZerosV), 3653 (v8i64 VR512:$src))), 3654 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3655def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), 3656 (v16i32 immAllZerosV), 3657 (v16i32 VR512:$src))), 3658 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3659 3660multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3661 X86VectorVTInfo Wide> { 3662 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3663 Narrow.RC:$src1, Narrow.RC:$src0)), 3664 (EXTRACT_SUBREG 3665 (Wide.VT 3666 (!cast<Instruction>(InstrStr#"rrk") 3667 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3668 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3669 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3670 Narrow.SubRegIdx)>; 3671 3672 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3673 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3674 (EXTRACT_SUBREG 3675 (Wide.VT 3676 (!cast<Instruction>(InstrStr#"rrkz") 3677 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3678 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3679 Narrow.SubRegIdx)>; 3680} 3681 3682// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3683// available. Use a 512-bit operation and extract. 3684let Predicates = [HasAVX512, NoVLX] in { 3685 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3686 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3687 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3688 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3689 3690 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3691 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3692 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3693 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3694} 3695 3696let Predicates = [HasBWI, NoVLX] in { 3697 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3698 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3699 3700 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3701 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3702} 3703 3704let Predicates = [HasAVX512] in { 3705 // 512-bit load. 3706 def : Pat<(alignedloadv16i32 addr:$src), 3707 (VMOVDQA64Zrm addr:$src)>; 3708 def : Pat<(alignedloadv32i16 addr:$src), 3709 (VMOVDQA64Zrm addr:$src)>; 3710 def : Pat<(alignedloadv64i8 addr:$src), 3711 (VMOVDQA64Zrm addr:$src)>; 3712 def : Pat<(loadv16i32 addr:$src), 3713 (VMOVDQU64Zrm addr:$src)>; 3714 def : Pat<(loadv32i16 addr:$src), 3715 (VMOVDQU64Zrm addr:$src)>; 3716 def : Pat<(loadv64i8 addr:$src), 3717 (VMOVDQU64Zrm addr:$src)>; 3718 3719 // 512-bit store. 3720 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3721 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3722 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3723 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3724 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3725 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3726 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3727 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3728 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3729 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3730 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3731 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3732} 3733 3734let Predicates = [HasVLX] in { 3735 // 128-bit load. 3736 def : Pat<(alignedloadv4i32 addr:$src), 3737 (VMOVDQA64Z128rm addr:$src)>; 3738 def : Pat<(alignedloadv8i16 addr:$src), 3739 (VMOVDQA64Z128rm addr:$src)>; 3740 def : Pat<(alignedloadv16i8 addr:$src), 3741 (VMOVDQA64Z128rm addr:$src)>; 3742 def : Pat<(loadv4i32 addr:$src), 3743 (VMOVDQU64Z128rm addr:$src)>; 3744 def : Pat<(loadv8i16 addr:$src), 3745 (VMOVDQU64Z128rm addr:$src)>; 3746 def : Pat<(loadv16i8 addr:$src), 3747 (VMOVDQU64Z128rm addr:$src)>; 3748 3749 // 128-bit store. 3750 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3751 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3752 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3753 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3754 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3755 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3756 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3757 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3758 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3759 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3760 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3761 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3762 3763 // 256-bit load. 3764 def : Pat<(alignedloadv8i32 addr:$src), 3765 (VMOVDQA64Z256rm addr:$src)>; 3766 def : Pat<(alignedloadv16i16 addr:$src), 3767 (VMOVDQA64Z256rm addr:$src)>; 3768 def : Pat<(alignedloadv32i8 addr:$src), 3769 (VMOVDQA64Z256rm addr:$src)>; 3770 def : Pat<(loadv8i32 addr:$src), 3771 (VMOVDQU64Z256rm addr:$src)>; 3772 def : Pat<(loadv16i16 addr:$src), 3773 (VMOVDQU64Z256rm addr:$src)>; 3774 def : Pat<(loadv32i8 addr:$src), 3775 (VMOVDQU64Z256rm addr:$src)>; 3776 3777 // 256-bit store. 3778 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3779 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3780 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3781 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3782 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3783 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3784 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3785 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3786 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3787 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3788 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3789 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3790} 3791 3792// Move Int Doubleword to Packed Double Int 3793// 3794let ExeDomain = SSEPackedInt in { 3795def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3796 "vmovd\t{$src, $dst|$dst, $src}", 3797 [(set VR128X:$dst, 3798 (v4i32 (scalar_to_vector GR32:$src)))]>, 3799 EVEX, Sched<[WriteVecMoveFromGpr]>; 3800def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3801 "vmovd\t{$src, $dst|$dst, $src}", 3802 [(set VR128X:$dst, 3803 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3804 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3805def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3806 "vmovq\t{$src, $dst|$dst, $src}", 3807 [(set VR128X:$dst, 3808 (v2i64 (scalar_to_vector GR64:$src)))]>, 3809 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3810let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3811def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3812 (ins i64mem:$src), 3813 "vmovq\t{$src, $dst|$dst, $src}", []>, 3814 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3815let isCodeGenOnly = 1 in { 3816def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3817 "vmovq\t{$src, $dst|$dst, $src}", 3818 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3819 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3820def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3821 "vmovq\t{$src, $dst|$dst, $src}", 3822 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3823 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3824} 3825} // ExeDomain = SSEPackedInt 3826 3827// Move Int Doubleword to Single Scalar 3828// 3829let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3830def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3831 "vmovd\t{$src, $dst|$dst, $src}", 3832 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3833 EVEX, Sched<[WriteVecMoveFromGpr]>; 3834} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3835 3836// Move doubleword from xmm register to r/m32 3837// 3838let ExeDomain = SSEPackedInt in { 3839def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3840 "vmovd\t{$src, $dst|$dst, $src}", 3841 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3842 (iPTR 0)))]>, 3843 EVEX, Sched<[WriteVecMoveToGpr]>; 3844def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3845 (ins i32mem:$dst, VR128X:$src), 3846 "vmovd\t{$src, $dst|$dst, $src}", 3847 [(store (i32 (extractelt (v4i32 VR128X:$src), 3848 (iPTR 0))), addr:$dst)]>, 3849 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3850} // ExeDomain = SSEPackedInt 3851 3852// Move quadword from xmm1 register to r/m64 3853// 3854let ExeDomain = SSEPackedInt in { 3855def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3856 "vmovq\t{$src, $dst|$dst, $src}", 3857 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3858 (iPTR 0)))]>, 3859 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 3860 Requires<[HasAVX512]>; 3861 3862let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3863def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3864 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 3865 EVEX, VEX_W, Sched<[WriteVecStore]>, 3866 Requires<[HasAVX512, In64BitMode]>; 3867 3868def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3869 (ins i64mem:$dst, VR128X:$src), 3870 "vmovq\t{$src, $dst|$dst, $src}", 3871 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3872 addr:$dst)]>, 3873 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 3874 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3875 3876let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3877def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3878 (ins VR128X:$src), 3879 "vmovq\t{$src, $dst|$dst, $src}", []>, 3880 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 3881} // ExeDomain = SSEPackedInt 3882 3883def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3884 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3885 3886let Predicates = [HasAVX512] in { 3887 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3888 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3889} 3890 3891// Move Scalar Single to Double Int 3892// 3893let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3894def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3895 (ins FR32X:$src), 3896 "vmovd\t{$src, $dst|$dst, $src}", 3897 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3898 EVEX, Sched<[WriteVecMoveToGpr]>; 3899} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3900 3901// Move Quadword Int to Packed Quadword Int 3902// 3903let ExeDomain = SSEPackedInt in { 3904def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3905 (ins i64mem:$src), 3906 "vmovq\t{$src, $dst|$dst, $src}", 3907 [(set VR128X:$dst, 3908 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3909 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3910} // ExeDomain = SSEPackedInt 3911 3912// Allow "vmovd" but print "vmovq". 3913def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3914 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3915def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3916 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3917 3918// Conversions between masks and scalar fp. 3919def : Pat<(v32i1 (bitconvert FR32X:$src)), 3920 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 3921def : Pat<(f32 (bitconvert VK32:$src)), 3922 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 3923 3924def : Pat<(v64i1 (bitconvert FR64X:$src)), 3925 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 3926def : Pat<(f64 (bitconvert VK64:$src)), 3927 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 3928 3929//===----------------------------------------------------------------------===// 3930// AVX-512 MOVSS, MOVSD 3931//===----------------------------------------------------------------------===// 3932 3933multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3934 X86VectorVTInfo _> { 3935 let Predicates = [HasAVX512, OptForSize] in 3936 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3937 (ins _.RC:$src1, _.RC:$src2), 3938 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3939 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3940 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 3941 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3942 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3943 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3944 "$dst {${mask}} {z}, $src1, $src2}"), 3945 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3946 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3947 _.ImmAllZerosV)))], 3948 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3949 let Constraints = "$src0 = $dst" in 3950 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3951 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3952 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3953 "$dst {${mask}}, $src1, $src2}"), 3954 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3955 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3956 (_.VT _.RC:$src0))))], 3957 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3958 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3959 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3960 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3961 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3962 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3963 // _alt version uses FR32/FR64 register class. 3964 let isCodeGenOnly = 1 in 3965 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3966 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3967 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3968 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3969 } 3970 let mayLoad = 1, hasSideEffects = 0 in { 3971 let Constraints = "$src0 = $dst" in 3972 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3973 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3974 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3975 "$dst {${mask}}, $src}"), 3976 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3977 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3978 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3979 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3980 "$dst {${mask}} {z}, $src}"), 3981 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3982 } 3983 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3984 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3985 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3986 EVEX, Sched<[WriteFStore]>; 3987 let mayStore = 1, hasSideEffects = 0 in 3988 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3989 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 3990 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3991 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 3992 NotMemoryFoldable; 3993} 3994 3995defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 3996 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 3997 3998defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 3999 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4000 4001 4002multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4003 PatLeaf ZeroFP, X86VectorVTInfo _> { 4004 4005def : Pat<(_.VT (OpNode _.RC:$src0, 4006 (_.VT (scalar_to_vector 4007 (_.EltVT (X86selects VK1WM:$mask, 4008 (_.EltVT _.FRC:$src1), 4009 (_.EltVT _.FRC:$src2))))))), 4010 (!cast<Instruction>(InstrStr#rrk) 4011 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4012 VK1WM:$mask, 4013 (_.VT _.RC:$src0), 4014 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4015 4016def : Pat<(_.VT (OpNode _.RC:$src0, 4017 (_.VT (scalar_to_vector 4018 (_.EltVT (X86selects VK1WM:$mask, 4019 (_.EltVT _.FRC:$src1), 4020 (_.EltVT ZeroFP))))))), 4021 (!cast<Instruction>(InstrStr#rrkz) 4022 VK1WM:$mask, 4023 (_.VT _.RC:$src0), 4024 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4025} 4026 4027multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4028 dag Mask, RegisterClass MaskRC> { 4029 4030def : Pat<(masked_store 4031 (_.info512.VT (insert_subvector undef, 4032 (_.info128.VT _.info128.RC:$src), 4033 (iPTR 0))), addr:$dst, Mask), 4034 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4035 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4036 _.info128.RC:$src)>; 4037 4038} 4039 4040multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4041 AVX512VLVectorVTInfo _, 4042 dag Mask, RegisterClass MaskRC, 4043 SubRegIndex subreg> { 4044 4045def : Pat<(masked_store 4046 (_.info512.VT (insert_subvector undef, 4047 (_.info128.VT _.info128.RC:$src), 4048 (iPTR 0))), addr:$dst, Mask), 4049 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4050 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4051 _.info128.RC:$src)>; 4052 4053} 4054 4055// This matches the more recent codegen from clang that avoids emitting a 512 4056// bit masked store directly. Codegen will widen 128-bit masked store to 512 4057// bits on AVX512F only targets. 4058multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4059 AVX512VLVectorVTInfo _, 4060 dag Mask512, dag Mask128, 4061 RegisterClass MaskRC, 4062 SubRegIndex subreg> { 4063 4064// AVX512F pattern. 4065def : Pat<(masked_store 4066 (_.info512.VT (insert_subvector undef, 4067 (_.info128.VT _.info128.RC:$src), 4068 (iPTR 0))), addr:$dst, Mask512), 4069 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4070 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4071 _.info128.RC:$src)>; 4072 4073// AVX512VL pattern. 4074def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4075 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4076 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4077 _.info128.RC:$src)>; 4078} 4079 4080multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4081 dag Mask, RegisterClass MaskRC> { 4082 4083def : Pat<(_.info128.VT (extract_subvector 4084 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4085 _.info512.ImmAllZerosV)), 4086 (iPTR 0))), 4087 (!cast<Instruction>(InstrStr#rmkz) 4088 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4089 addr:$srcAddr)>; 4090 4091def : Pat<(_.info128.VT (extract_subvector 4092 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4093 (_.info512.VT (insert_subvector undef, 4094 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4095 (iPTR 0))))), 4096 (iPTR 0))), 4097 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4098 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4099 addr:$srcAddr)>; 4100 4101} 4102 4103multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4104 AVX512VLVectorVTInfo _, 4105 dag Mask, RegisterClass MaskRC, 4106 SubRegIndex subreg> { 4107 4108def : Pat<(_.info128.VT (extract_subvector 4109 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4110 _.info512.ImmAllZerosV)), 4111 (iPTR 0))), 4112 (!cast<Instruction>(InstrStr#rmkz) 4113 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4114 addr:$srcAddr)>; 4115 4116def : Pat<(_.info128.VT (extract_subvector 4117 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4118 (_.info512.VT (insert_subvector undef, 4119 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4120 (iPTR 0))))), 4121 (iPTR 0))), 4122 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4123 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4124 addr:$srcAddr)>; 4125 4126} 4127 4128// This matches the more recent codegen from clang that avoids emitting a 512 4129// bit masked load directly. Codegen will widen 128-bit masked load to 512 4130// bits on AVX512F only targets. 4131multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4132 AVX512VLVectorVTInfo _, 4133 dag Mask512, dag Mask128, 4134 RegisterClass MaskRC, 4135 SubRegIndex subreg> { 4136// AVX512F patterns. 4137def : Pat<(_.info128.VT (extract_subvector 4138 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4139 _.info512.ImmAllZerosV)), 4140 (iPTR 0))), 4141 (!cast<Instruction>(InstrStr#rmkz) 4142 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4143 addr:$srcAddr)>; 4144 4145def : Pat<(_.info128.VT (extract_subvector 4146 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4147 (_.info512.VT (insert_subvector undef, 4148 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4149 (iPTR 0))))), 4150 (iPTR 0))), 4151 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4152 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4153 addr:$srcAddr)>; 4154 4155// AVX512Vl patterns. 4156def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4157 _.info128.ImmAllZerosV)), 4158 (!cast<Instruction>(InstrStr#rmkz) 4159 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4160 addr:$srcAddr)>; 4161 4162def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4163 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4164 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4165 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4166 addr:$srcAddr)>; 4167} 4168 4169defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4170defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4171 4172defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4173 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4174defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4175 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4176defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4177 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4178 4179defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4180 (v16i1 (insert_subvector 4181 (v16i1 immAllZerosV), 4182 (v4i1 (extract_subvector 4183 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4184 (iPTR 0))), 4185 (iPTR 0))), 4186 (v4i1 (extract_subvector 4187 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4188 (iPTR 0))), GR8, sub_8bit>; 4189defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4190 (v8i1 4191 (extract_subvector 4192 (v16i1 4193 (insert_subvector 4194 (v16i1 immAllZerosV), 4195 (v2i1 (extract_subvector 4196 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4197 (iPTR 0))), 4198 (iPTR 0))), 4199 (iPTR 0))), 4200 (v2i1 (extract_subvector 4201 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4202 (iPTR 0))), GR8, sub_8bit>; 4203 4204defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4205 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4206defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4207 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4208defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4209 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4210 4211defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4212 (v16i1 (insert_subvector 4213 (v16i1 immAllZerosV), 4214 (v4i1 (extract_subvector 4215 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4216 (iPTR 0))), 4217 (iPTR 0))), 4218 (v4i1 (extract_subvector 4219 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4220 (iPTR 0))), GR8, sub_8bit>; 4221defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4222 (v8i1 4223 (extract_subvector 4224 (v16i1 4225 (insert_subvector 4226 (v16i1 immAllZerosV), 4227 (v2i1 (extract_subvector 4228 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4229 (iPTR 0))), 4230 (iPTR 0))), 4231 (iPTR 0))), 4232 (v2i1 (extract_subvector 4233 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4234 (iPTR 0))), GR8, sub_8bit>; 4235 4236def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4237 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4238 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4239 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4240 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4241 4242def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4243 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4244 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4245 4246def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4247 (COPY_TO_REGCLASS 4248 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4249 VK1WM:$mask, addr:$src)), 4250 FR32X)>; 4251def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4252 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4253 4254def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4255 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4256 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4257 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4258 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4259 4260def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4261 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4262 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4263 4264def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4265 (COPY_TO_REGCLASS 4266 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4267 VK1WM:$mask, addr:$src)), 4268 FR64X)>; 4269def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4270 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4271 4272 4273def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4274 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4275def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4276 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4277 4278def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4279 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4280def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4281 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4282 4283let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4284 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4285 (ins VR128X:$src1, VR128X:$src2), 4286 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4287 []>, XS, EVEX_4V, VEX_LIG, 4288 FoldGenData<"VMOVSSZrr">, 4289 Sched<[SchedWriteFShuffle.XMM]>; 4290 4291 let Constraints = "$src0 = $dst" in 4292 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4293 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4294 VR128X:$src1, VR128X:$src2), 4295 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4296 "$dst {${mask}}, $src1, $src2}", 4297 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4298 FoldGenData<"VMOVSSZrrk">, 4299 Sched<[SchedWriteFShuffle.XMM]>; 4300 4301 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4302 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4303 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4304 "$dst {${mask}} {z}, $src1, $src2}", 4305 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4306 FoldGenData<"VMOVSSZrrkz">, 4307 Sched<[SchedWriteFShuffle.XMM]>; 4308 4309 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4310 (ins VR128X:$src1, VR128X:$src2), 4311 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4312 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4313 FoldGenData<"VMOVSDZrr">, 4314 Sched<[SchedWriteFShuffle.XMM]>; 4315 4316 let Constraints = "$src0 = $dst" in 4317 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4318 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4319 VR128X:$src1, VR128X:$src2), 4320 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4321 "$dst {${mask}}, $src1, $src2}", 4322 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4323 VEX_W, FoldGenData<"VMOVSDZrrk">, 4324 Sched<[SchedWriteFShuffle.XMM]>; 4325 4326 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4327 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4328 VR128X:$src2), 4329 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4330 "$dst {${mask}} {z}, $src1, $src2}", 4331 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4332 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4333 Sched<[SchedWriteFShuffle.XMM]>; 4334} 4335 4336def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4337 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4338def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4339 "$dst {${mask}}, $src1, $src2}", 4340 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4341 VR128X:$src1, VR128X:$src2), 0>; 4342def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4343 "$dst {${mask}} {z}, $src1, $src2}", 4344 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4345 VR128X:$src1, VR128X:$src2), 0>; 4346def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4347 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4348def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4349 "$dst {${mask}}, $src1, $src2}", 4350 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4351 VR128X:$src1, VR128X:$src2), 0>; 4352def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4353 "$dst {${mask}} {z}, $src1, $src2}", 4354 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4355 VR128X:$src1, VR128X:$src2), 0>; 4356 4357let Predicates = [HasAVX512, OptForSize] in { 4358 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4359 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4360 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4361 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4362 4363 // Move low f32 and clear high bits. 4364 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4365 (SUBREG_TO_REG (i32 0), 4366 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4367 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4368 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4369 (SUBREG_TO_REG (i32 0), 4370 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4371 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4372 4373 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4374 (SUBREG_TO_REG (i32 0), 4375 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4376 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4377 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4378 (SUBREG_TO_REG (i32 0), 4379 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4380 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4381} 4382 4383// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4384// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4385let Predicates = [HasAVX512, OptForSpeed] in { 4386 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4387 (SUBREG_TO_REG (i32 0), 4388 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4389 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4390 (i8 1))), sub_xmm)>; 4391 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4392 (SUBREG_TO_REG (i32 0), 4393 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4394 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4395 (i8 3))), sub_xmm)>; 4396} 4397 4398let Predicates = [HasAVX512] in { 4399 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4400 (VMOVSSZrm addr:$src)>; 4401 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4402 (VMOVSDZrm addr:$src)>; 4403 4404 // Represent the same patterns above but in the form they appear for 4405 // 256-bit types 4406 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4407 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4408 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4409 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4410 4411 // Represent the same patterns above but in the form they appear for 4412 // 512-bit types 4413 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4414 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4415 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4416 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4417} 4418 4419let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4420def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4421 (ins VR128X:$src), 4422 "vmovq\t{$src, $dst|$dst, $src}", 4423 [(set VR128X:$dst, (v2i64 (X86vzmovl 4424 (v2i64 VR128X:$src))))]>, 4425 EVEX, VEX_W; 4426} 4427 4428let Predicates = [HasAVX512] in { 4429 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4430 (VMOVDI2PDIZrr GR32:$src)>; 4431 4432 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4433 (VMOV64toPQIZrr GR64:$src)>; 4434 4435 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4436 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4437 (VMOVDI2PDIZrm addr:$src)>; 4438 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4439 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4440 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4441 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4442 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4443 (VMOVQI2PQIZrm addr:$src)>; 4444 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4445 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4446 4447 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4448 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4449 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4450 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4451 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4452 4453 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4454 (SUBREG_TO_REG (i32 0), 4455 (v2f64 (VMOVZPQILo2PQIZrr 4456 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4457 sub_xmm)>; 4458 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4459 (SUBREG_TO_REG (i32 0), 4460 (v2i64 (VMOVZPQILo2PQIZrr 4461 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4462 sub_xmm)>; 4463 4464 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4465 (SUBREG_TO_REG (i32 0), 4466 (v2f64 (VMOVZPQILo2PQIZrr 4467 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4468 sub_xmm)>; 4469 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4470 (SUBREG_TO_REG (i32 0), 4471 (v2i64 (VMOVZPQILo2PQIZrr 4472 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4473 sub_xmm)>; 4474} 4475 4476//===----------------------------------------------------------------------===// 4477// AVX-512 - Non-temporals 4478//===----------------------------------------------------------------------===// 4479 4480def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4481 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4482 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4483 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4484 4485let Predicates = [HasVLX] in { 4486 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4487 (ins i256mem:$src), 4488 "vmovntdqa\t{$src, $dst|$dst, $src}", 4489 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4490 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4491 4492 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4493 (ins i128mem:$src), 4494 "vmovntdqa\t{$src, $dst|$dst, $src}", 4495 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4496 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4497} 4498 4499multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4500 X86SchedWriteMoveLS Sched, 4501 PatFrag st_frag = alignednontemporalstore> { 4502 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4503 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4504 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4505 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4506 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4507} 4508 4509multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4510 AVX512VLVectorVTInfo VTInfo, 4511 X86SchedWriteMoveLSWidths Sched> { 4512 let Predicates = [HasAVX512] in 4513 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4514 4515 let Predicates = [HasAVX512, HasVLX] in { 4516 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4517 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4518 } 4519} 4520 4521defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4522 SchedWriteVecMoveLSNT>, PD; 4523defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4524 SchedWriteFMoveLSNT>, PD, VEX_W; 4525defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4526 SchedWriteFMoveLSNT>, PS; 4527 4528let Predicates = [HasAVX512], AddedComplexity = 400 in { 4529 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4530 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4531 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4532 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4533 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4534 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4535 4536 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4537 (VMOVNTDQAZrm addr:$src)>; 4538 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4539 (VMOVNTDQAZrm addr:$src)>; 4540 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4541 (VMOVNTDQAZrm addr:$src)>; 4542 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4543 (VMOVNTDQAZrm addr:$src)>; 4544 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4545 (VMOVNTDQAZrm addr:$src)>; 4546 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4547 (VMOVNTDQAZrm addr:$src)>; 4548} 4549 4550let Predicates = [HasVLX], AddedComplexity = 400 in { 4551 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4552 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4553 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4554 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4555 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4556 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4557 4558 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4559 (VMOVNTDQAZ256rm addr:$src)>; 4560 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4561 (VMOVNTDQAZ256rm addr:$src)>; 4562 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4563 (VMOVNTDQAZ256rm addr:$src)>; 4564 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4565 (VMOVNTDQAZ256rm addr:$src)>; 4566 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4567 (VMOVNTDQAZ256rm addr:$src)>; 4568 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4569 (VMOVNTDQAZ256rm addr:$src)>; 4570 4571 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4572 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4573 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4574 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4575 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4576 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4577 4578 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4579 (VMOVNTDQAZ128rm addr:$src)>; 4580 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4581 (VMOVNTDQAZ128rm addr:$src)>; 4582 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4583 (VMOVNTDQAZ128rm addr:$src)>; 4584 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4585 (VMOVNTDQAZ128rm addr:$src)>; 4586 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4587 (VMOVNTDQAZ128rm addr:$src)>; 4588 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4589 (VMOVNTDQAZ128rm addr:$src)>; 4590} 4591 4592//===----------------------------------------------------------------------===// 4593// AVX-512 - Integer arithmetic 4594// 4595multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4596 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4597 bit IsCommutable = 0> { 4598 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4599 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4600 "$src2, $src1", "$src1, $src2", 4601 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4602 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4603 Sched<[sched]>; 4604 4605 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4606 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4607 "$src2, $src1", "$src1, $src2", 4608 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4609 AVX512BIBase, EVEX_4V, 4610 Sched<[sched.Folded, sched.ReadAfterFold]>; 4611} 4612 4613multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4614 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4615 bit IsCommutable = 0> : 4616 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4617 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4618 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4619 "${src2}"#_.BroadcastStr#", $src1", 4620 "$src1, ${src2}"#_.BroadcastStr, 4621 (_.VT (OpNode _.RC:$src1, 4622 (_.BroadcastLdFrag addr:$src2)))>, 4623 AVX512BIBase, EVEX_4V, EVEX_B, 4624 Sched<[sched.Folded, sched.ReadAfterFold]>; 4625} 4626 4627multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4628 AVX512VLVectorVTInfo VTInfo, 4629 X86SchedWriteWidths sched, Predicate prd, 4630 bit IsCommutable = 0> { 4631 let Predicates = [prd] in 4632 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4633 IsCommutable>, EVEX_V512; 4634 4635 let Predicates = [prd, HasVLX] in { 4636 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4637 sched.YMM, IsCommutable>, EVEX_V256; 4638 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4639 sched.XMM, IsCommutable>, EVEX_V128; 4640 } 4641} 4642 4643multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4644 AVX512VLVectorVTInfo VTInfo, 4645 X86SchedWriteWidths sched, Predicate prd, 4646 bit IsCommutable = 0> { 4647 let Predicates = [prd] in 4648 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4649 IsCommutable>, EVEX_V512; 4650 4651 let Predicates = [prd, HasVLX] in { 4652 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4653 sched.YMM, IsCommutable>, EVEX_V256; 4654 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4655 sched.XMM, IsCommutable>, EVEX_V128; 4656 } 4657} 4658 4659multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4660 X86SchedWriteWidths sched, Predicate prd, 4661 bit IsCommutable = 0> { 4662 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4663 sched, prd, IsCommutable>, 4664 VEX_W, EVEX_CD8<64, CD8VF>; 4665} 4666 4667multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4668 X86SchedWriteWidths sched, Predicate prd, 4669 bit IsCommutable = 0> { 4670 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4671 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4672} 4673 4674multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4675 X86SchedWriteWidths sched, Predicate prd, 4676 bit IsCommutable = 0> { 4677 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4678 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4679 VEX_WIG; 4680} 4681 4682multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4683 X86SchedWriteWidths sched, Predicate prd, 4684 bit IsCommutable = 0> { 4685 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4686 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4687 VEX_WIG; 4688} 4689 4690multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4691 SDNode OpNode, X86SchedWriteWidths sched, 4692 Predicate prd, bit IsCommutable = 0> { 4693 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4694 IsCommutable>; 4695 4696 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4697 IsCommutable>; 4698} 4699 4700multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4701 SDNode OpNode, X86SchedWriteWidths sched, 4702 Predicate prd, bit IsCommutable = 0> { 4703 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4704 IsCommutable>; 4705 4706 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4707 IsCommutable>; 4708} 4709 4710multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4711 bits<8> opc_d, bits<8> opc_q, 4712 string OpcodeStr, SDNode OpNode, 4713 X86SchedWriteWidths sched, 4714 bit IsCommutable = 0> { 4715 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4716 sched, HasAVX512, IsCommutable>, 4717 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4718 sched, HasBWI, IsCommutable>; 4719} 4720 4721multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4722 X86FoldableSchedWrite sched, 4723 SDNode OpNode,X86VectorVTInfo _Src, 4724 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4725 bit IsCommutable = 0> { 4726 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4727 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4728 "$src2, $src1","$src1, $src2", 4729 (_Dst.VT (OpNode 4730 (_Src.VT _Src.RC:$src1), 4731 (_Src.VT _Src.RC:$src2))), 4732 IsCommutable>, 4733 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4734 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4735 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4736 "$src2, $src1", "$src1, $src2", 4737 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4738 (_Src.LdFrag addr:$src2)))>, 4739 AVX512BIBase, EVEX_4V, 4740 Sched<[sched.Folded, sched.ReadAfterFold]>; 4741 4742 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4743 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4744 OpcodeStr, 4745 "${src2}"#_Brdct.BroadcastStr#", $src1", 4746 "$src1, ${src2}"#_Brdct.BroadcastStr, 4747 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4748 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4749 AVX512BIBase, EVEX_4V, EVEX_B, 4750 Sched<[sched.Folded, sched.ReadAfterFold]>; 4751} 4752 4753defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4754 SchedWriteVecALU, 1>; 4755defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4756 SchedWriteVecALU, 0>; 4757defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4758 SchedWriteVecALU, HasBWI, 1>; 4759defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4760 SchedWriteVecALU, HasBWI, 0>; 4761defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4762 SchedWriteVecALU, HasBWI, 1>; 4763defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4764 SchedWriteVecALU, HasBWI, 0>; 4765defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4766 SchedWritePMULLD, HasAVX512, 1>, T8PD; 4767defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4768 SchedWriteVecIMul, HasBWI, 1>; 4769defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4770 SchedWriteVecIMul, HasDQI, 1>, T8PD, 4771 NotEVEX2VEXConvertible; 4772defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4773 HasBWI, 1>; 4774defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4775 HasBWI, 1>; 4776defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4777 SchedWriteVecIMul, HasBWI, 1>, T8PD; 4778defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 4779 SchedWriteVecALU, HasBWI, 1>; 4780defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4781 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 4782defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4783 SchedWriteVecIMul, HasAVX512, 1>; 4784 4785multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4786 X86SchedWriteWidths sched, 4787 AVX512VLVectorVTInfo _SrcVTInfo, 4788 AVX512VLVectorVTInfo _DstVTInfo, 4789 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4790 let Predicates = [prd] in 4791 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4792 _SrcVTInfo.info512, _DstVTInfo.info512, 4793 v8i64_info, IsCommutable>, 4794 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 4795 let Predicates = [HasVLX, prd] in { 4796 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4797 _SrcVTInfo.info256, _DstVTInfo.info256, 4798 v4i64x_info, IsCommutable>, 4799 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 4800 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4801 _SrcVTInfo.info128, _DstVTInfo.info128, 4802 v2i64x_info, IsCommutable>, 4803 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 4804 } 4805} 4806 4807defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4808 avx512vl_i8_info, avx512vl_i8_info, 4809 X86multishift, HasVBMI, 0>, T8PD; 4810 4811multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4812 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4813 X86FoldableSchedWrite sched> { 4814 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4815 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4816 OpcodeStr, 4817 "${src2}"#_Src.BroadcastStr#", $src1", 4818 "$src1, ${src2}"#_Src.BroadcastStr, 4819 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4820 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4821 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4822 Sched<[sched.Folded, sched.ReadAfterFold]>; 4823} 4824 4825multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4826 SDNode OpNode,X86VectorVTInfo _Src, 4827 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4828 bit IsCommutable = 0> { 4829 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4830 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4831 "$src2, $src1","$src1, $src2", 4832 (_Dst.VT (OpNode 4833 (_Src.VT _Src.RC:$src1), 4834 (_Src.VT _Src.RC:$src2))), 4835 IsCommutable, IsCommutable>, 4836 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 4837 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4838 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4839 "$src2, $src1", "$src1, $src2", 4840 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4841 (_Src.LdFrag addr:$src2)))>, 4842 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 4843 Sched<[sched.Folded, sched.ReadAfterFold]>; 4844} 4845 4846multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4847 SDNode OpNode> { 4848 let Predicates = [HasBWI] in 4849 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4850 v32i16_info, SchedWriteShuffle.ZMM>, 4851 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4852 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4853 let Predicates = [HasBWI, HasVLX] in { 4854 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4855 v16i16x_info, SchedWriteShuffle.YMM>, 4856 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4857 v16i16x_info, SchedWriteShuffle.YMM>, 4858 EVEX_V256; 4859 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4860 v8i16x_info, SchedWriteShuffle.XMM>, 4861 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4862 v8i16x_info, SchedWriteShuffle.XMM>, 4863 EVEX_V128; 4864 } 4865} 4866multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4867 SDNode OpNode> { 4868 let Predicates = [HasBWI] in 4869 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4870 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 4871 let Predicates = [HasBWI, HasVLX] in { 4872 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4873 v32i8x_info, SchedWriteShuffle.YMM>, 4874 EVEX_V256, VEX_WIG; 4875 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4876 v16i8x_info, SchedWriteShuffle.XMM>, 4877 EVEX_V128, VEX_WIG; 4878 } 4879} 4880 4881multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4882 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4883 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4884 let Predicates = [HasBWI] in 4885 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4886 _Dst.info512, SchedWriteVecIMul.ZMM, 4887 IsCommutable>, EVEX_V512; 4888 let Predicates = [HasBWI, HasVLX] in { 4889 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4890 _Dst.info256, SchedWriteVecIMul.YMM, 4891 IsCommutable>, EVEX_V256; 4892 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4893 _Dst.info128, SchedWriteVecIMul.XMM, 4894 IsCommutable>, EVEX_V128; 4895 } 4896} 4897 4898defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4899defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4900defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4901defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4902 4903defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4904 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 4905defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4906 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 4907 4908defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4909 SchedWriteVecALU, HasBWI, 1>, T8PD; 4910defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4911 SchedWriteVecALU, HasBWI, 1>; 4912defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4913 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4914defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4915 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4916 NotEVEX2VEXConvertible; 4917 4918defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4919 SchedWriteVecALU, HasBWI, 1>; 4920defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4921 SchedWriteVecALU, HasBWI, 1>, T8PD; 4922defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4923 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4924defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4925 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4926 NotEVEX2VEXConvertible; 4927 4928defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4929 SchedWriteVecALU, HasBWI, 1>, T8PD; 4930defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4931 SchedWriteVecALU, HasBWI, 1>; 4932defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4933 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4934defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4935 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4936 NotEVEX2VEXConvertible; 4937 4938defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4939 SchedWriteVecALU, HasBWI, 1>; 4940defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4941 SchedWriteVecALU, HasBWI, 1>, T8PD; 4942defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4943 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4944defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4945 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4946 NotEVEX2VEXConvertible; 4947 4948// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4949let Predicates = [HasDQI, NoVLX] in { 4950 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4951 (EXTRACT_SUBREG 4952 (VPMULLQZrr 4953 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4954 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4955 sub_ymm)>; 4956 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4957 (EXTRACT_SUBREG 4958 (VPMULLQZrmb 4959 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4960 addr:$src2), 4961 sub_ymm)>; 4962 4963 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 4964 (EXTRACT_SUBREG 4965 (VPMULLQZrr 4966 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4967 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4968 sub_xmm)>; 4969 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 4970 (EXTRACT_SUBREG 4971 (VPMULLQZrmb 4972 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4973 addr:$src2), 4974 sub_xmm)>; 4975} 4976 4977multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 4978 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 4979 (EXTRACT_SUBREG 4980 (!cast<Instruction>(Instr#"rr") 4981 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4982 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4983 sub_ymm)>; 4984 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4985 (EXTRACT_SUBREG 4986 (!cast<Instruction>(Instr#"rmb") 4987 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4988 addr:$src2), 4989 sub_ymm)>; 4990 4991 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 4992 (EXTRACT_SUBREG 4993 (!cast<Instruction>(Instr#"rr") 4994 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4995 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4996 sub_xmm)>; 4997 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 4998 (EXTRACT_SUBREG 4999 (!cast<Instruction>(Instr#"rmb") 5000 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5001 addr:$src2), 5002 sub_xmm)>; 5003} 5004 5005let Predicates = [HasAVX512, NoVLX] in { 5006 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5007 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5008 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5009 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5010} 5011 5012//===----------------------------------------------------------------------===// 5013// AVX-512 Logical Instructions 5014//===----------------------------------------------------------------------===// 5015 5016defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5017 SchedWriteVecLogic, HasAVX512, 1>; 5018defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5019 SchedWriteVecLogic, HasAVX512, 1>; 5020defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5021 SchedWriteVecLogic, HasAVX512, 1>; 5022defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5023 SchedWriteVecLogic, HasAVX512>; 5024 5025let Predicates = [HasVLX] in { 5026 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5027 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5028 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5029 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5030 5031 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5032 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5033 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5034 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5035 5036 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5037 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5038 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5039 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5040 5041 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5042 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5043 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5044 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5045 5046 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5047 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5048 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5049 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5050 5051 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5052 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5053 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5054 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5055 5056 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5057 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5058 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5059 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5060 5061 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5062 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5063 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5064 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5065 5066 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5067 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5068 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5069 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5070 5071 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5072 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5073 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5074 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5075 5076 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5077 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5078 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5079 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5080 5081 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5082 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5083 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5084 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5085 5086 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5087 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5088 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5089 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5090 5091 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5092 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5093 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5094 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5095 5096 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5097 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5098 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5099 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5100 5101 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5102 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5103 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5104 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5105} 5106 5107let Predicates = [HasAVX512] in { 5108 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5109 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5110 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5111 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5112 5113 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5114 (VPORQZrr VR512:$src1, VR512:$src2)>; 5115 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5116 (VPORQZrr VR512:$src1, VR512:$src2)>; 5117 5118 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5119 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5120 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5121 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5122 5123 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5124 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5125 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5126 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5127 5128 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5129 (VPANDQZrm VR512:$src1, addr:$src2)>; 5130 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5131 (VPANDQZrm VR512:$src1, addr:$src2)>; 5132 5133 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5134 (VPORQZrm VR512:$src1, addr:$src2)>; 5135 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5136 (VPORQZrm VR512:$src1, addr:$src2)>; 5137 5138 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5139 (VPXORQZrm VR512:$src1, addr:$src2)>; 5140 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5141 (VPXORQZrm VR512:$src1, addr:$src2)>; 5142 5143 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5144 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5145 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5146 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5147} 5148 5149// Patterns to catch vselect with different type than logic op. 5150multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5151 X86VectorVTInfo _, 5152 X86VectorVTInfo IntInfo> { 5153 // Masked register-register logical operations. 5154 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5155 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5156 _.RC:$src0)), 5157 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5158 _.RC:$src1, _.RC:$src2)>; 5159 5160 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5161 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5162 _.ImmAllZerosV)), 5163 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5164 _.RC:$src2)>; 5165 5166 // Masked register-memory logical operations. 5167 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5168 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5169 (load addr:$src2)))), 5170 _.RC:$src0)), 5171 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5172 _.RC:$src1, addr:$src2)>; 5173 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5174 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5175 (load addr:$src2)))), 5176 _.ImmAllZerosV)), 5177 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5178 addr:$src2)>; 5179} 5180 5181multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5182 X86VectorVTInfo _, 5183 X86VectorVTInfo IntInfo> { 5184 // Register-broadcast logical operations. 5185 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5186 (bitconvert 5187 (IntInfo.VT (OpNode _.RC:$src1, 5188 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5189 _.RC:$src0)), 5190 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5191 _.RC:$src1, addr:$src2)>; 5192 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5193 (bitconvert 5194 (IntInfo.VT (OpNode _.RC:$src1, 5195 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5196 _.ImmAllZerosV)), 5197 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5198 _.RC:$src1, addr:$src2)>; 5199} 5200 5201multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5202 AVX512VLVectorVTInfo SelectInfo, 5203 AVX512VLVectorVTInfo IntInfo> { 5204let Predicates = [HasVLX] in { 5205 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5206 IntInfo.info128>; 5207 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5208 IntInfo.info256>; 5209} 5210let Predicates = [HasAVX512] in { 5211 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5212 IntInfo.info512>; 5213} 5214} 5215 5216multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5217 AVX512VLVectorVTInfo SelectInfo, 5218 AVX512VLVectorVTInfo IntInfo> { 5219let Predicates = [HasVLX] in { 5220 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5221 SelectInfo.info128, IntInfo.info128>; 5222 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5223 SelectInfo.info256, IntInfo.info256>; 5224} 5225let Predicates = [HasAVX512] in { 5226 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5227 SelectInfo.info512, IntInfo.info512>; 5228} 5229} 5230 5231multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5232 // i64 vselect with i32/i16/i8 logic op 5233 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5234 avx512vl_i32_info>; 5235 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5236 avx512vl_i16_info>; 5237 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5238 avx512vl_i8_info>; 5239 5240 // i32 vselect with i64/i16/i8 logic op 5241 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5242 avx512vl_i64_info>; 5243 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5244 avx512vl_i16_info>; 5245 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5246 avx512vl_i8_info>; 5247 5248 // f32 vselect with i64/i32/i16/i8 logic op 5249 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5250 avx512vl_i64_info>; 5251 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5252 avx512vl_i32_info>; 5253 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5254 avx512vl_i16_info>; 5255 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5256 avx512vl_i8_info>; 5257 5258 // f64 vselect with i64/i32/i16/i8 logic op 5259 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5260 avx512vl_i64_info>; 5261 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5262 avx512vl_i32_info>; 5263 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5264 avx512vl_i16_info>; 5265 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5266 avx512vl_i8_info>; 5267 5268 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5269 avx512vl_f32_info, 5270 avx512vl_i32_info>; 5271 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5272 avx512vl_f64_info, 5273 avx512vl_i64_info>; 5274} 5275 5276defm : avx512_logical_lowering_types<"VPAND", and>; 5277defm : avx512_logical_lowering_types<"VPOR", or>; 5278defm : avx512_logical_lowering_types<"VPXOR", xor>; 5279defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5280 5281//===----------------------------------------------------------------------===// 5282// AVX-512 FP arithmetic 5283//===----------------------------------------------------------------------===// 5284 5285multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5286 SDNode OpNode, SDNode VecNode, 5287 X86FoldableSchedWrite sched, bit IsCommutable> { 5288 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5289 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5290 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5291 "$src2, $src1", "$src1, $src2", 5292 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5293 Sched<[sched]>; 5294 5295 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5296 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5297 "$src2, $src1", "$src1, $src2", 5298 (_.VT (VecNode _.RC:$src1, 5299 (_.ScalarIntMemFrags addr:$src2)))>, 5300 Sched<[sched.Folded, sched.ReadAfterFold]>; 5301 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5302 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5303 (ins _.FRC:$src1, _.FRC:$src2), 5304 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5305 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5306 Sched<[sched]> { 5307 let isCommutable = IsCommutable; 5308 } 5309 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5310 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5311 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5312 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5313 (_.ScalarLdFrag addr:$src2)))]>, 5314 Sched<[sched.Folded, sched.ReadAfterFold]>; 5315 } 5316 } 5317} 5318 5319multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5320 SDNode VecNode, X86FoldableSchedWrite sched, 5321 bit IsCommutable = 0> { 5322 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5323 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5324 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5325 "$rc, $src2, $src1", "$src1, $src2, $rc", 5326 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5327 (i32 timm:$rc))>, 5328 EVEX_B, EVEX_RC, Sched<[sched]>; 5329} 5330multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5331 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5332 X86FoldableSchedWrite sched, bit IsCommutable, 5333 string EVEX2VexOvrd> { 5334 let ExeDomain = _.ExeDomain in { 5335 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5336 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5337 "$src2, $src1", "$src1, $src2", 5338 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5339 Sched<[sched]>, SIMD_EXC; 5340 5341 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5342 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5343 "$src2, $src1", "$src1, $src2", 5344 (_.VT (VecNode _.RC:$src1, 5345 (_.ScalarIntMemFrags addr:$src2)))>, 5346 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5347 5348 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5349 Uses = [MXCSR], mayRaiseFPException = 1 in { 5350 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5351 (ins _.FRC:$src1, _.FRC:$src2), 5352 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5353 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5354 Sched<[sched]>, 5355 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5356 let isCommutable = IsCommutable; 5357 } 5358 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5359 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5360 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5361 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5362 (_.ScalarLdFrag addr:$src2)))]>, 5363 Sched<[sched.Folded, sched.ReadAfterFold]>, 5364 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5365 } 5366 5367 let Uses = [MXCSR] in 5368 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5369 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5370 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5371 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5372 EVEX_B, Sched<[sched]>; 5373 } 5374} 5375 5376multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 5377 SDNode VecNode, SDNode RndNode, 5378 X86SchedWriteSizes sched, bit IsCommutable> { 5379 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5380 sched.PS.Scl, IsCommutable>, 5381 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5382 sched.PS.Scl, IsCommutable>, 5383 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5384 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5385 sched.PD.Scl, IsCommutable>, 5386 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5387 sched.PD.Scl, IsCommutable>, 5388 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5389} 5390 5391multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5392 SDNode VecNode, SDNode SaeNode, 5393 X86SchedWriteSizes sched, bit IsCommutable> { 5394 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5395 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5396 NAME#"SS">, 5397 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5398 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5399 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5400 NAME#"SD">, 5401 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5402} 5403defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5404 SchedWriteFAddSizes, 1>; 5405defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5406 SchedWriteFMulSizes, 1>; 5407defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5408 SchedWriteFAddSizes, 0>; 5409defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5410 SchedWriteFDivSizes, 0>; 5411defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5412 SchedWriteFCmpSizes, 0>; 5413defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5414 SchedWriteFCmpSizes, 0>; 5415 5416// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5417// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5418multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5419 X86VectorVTInfo _, SDNode OpNode, 5420 X86FoldableSchedWrite sched, 5421 string EVEX2VEXOvrd> { 5422 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5423 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5424 (ins _.FRC:$src1, _.FRC:$src2), 5425 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5426 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5427 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5428 let isCommutable = 1; 5429 } 5430 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5431 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5432 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5433 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5434 (_.ScalarLdFrag addr:$src2)))]>, 5435 Sched<[sched.Folded, sched.ReadAfterFold]>, 5436 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5437 } 5438} 5439defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5440 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5441 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5442 5443defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5444 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5445 VEX_W, EVEX_4V, VEX_LIG, 5446 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5447 5448defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5449 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5450 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5451 5452defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5453 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5454 VEX_W, EVEX_4V, VEX_LIG, 5455 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5456 5457multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5458 SDPatternOperator MaskOpNode, 5459 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5460 bit IsCommutable, 5461 bit IsKCommutable = IsCommutable> { 5462 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5463 Uses = [MXCSR], mayRaiseFPException = 1 in { 5464 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5465 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5466 "$src2, $src1", "$src1, $src2", 5467 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5468 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 5469 IsKCommutable, IsKCommutable>, 5470 EVEX_4V, Sched<[sched]>; 5471 let mayLoad = 1 in { 5472 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5473 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5474 "$src2, $src1", "$src1, $src2", 5475 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5476 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5477 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5478 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5479 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5480 "${src2}"#_.BroadcastStr#", $src1", 5481 "$src1, ${src2}"#_.BroadcastStr, 5482 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5483 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5484 EVEX_4V, EVEX_B, 5485 Sched<[sched.Folded, sched.ReadAfterFold]>; 5486 } 5487 } 5488} 5489 5490multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5491 SDPatternOperator OpNodeRnd, 5492 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5493 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5494 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5495 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix, 5496 "$rc, $src2, $src1", "$src1, $src2, $rc", 5497 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>, 5498 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5499} 5500 5501multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5502 SDPatternOperator OpNodeSAE, 5503 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5504 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5505 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5506 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5507 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5508 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5509 EVEX_4V, EVEX_B, Sched<[sched]>; 5510} 5511 5512multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5513 SDPatternOperator MaskOpNode, 5514 Predicate prd, X86SchedWriteSizes sched, 5515 bit IsCommutable = 0, 5516 bit IsPD128Commutable = IsCommutable> { 5517 let Predicates = [prd] in { 5518 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5519 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5520 EVEX_CD8<32, CD8VF>; 5521 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5522 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5523 EVEX_CD8<64, CD8VF>; 5524 } 5525 5526 // Define only if AVX512VL feature is present. 5527 let Predicates = [prd, HasVLX] in { 5528 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5529 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5530 EVEX_CD8<32, CD8VF>; 5531 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5532 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5533 EVEX_CD8<32, CD8VF>; 5534 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5535 sched.PD.XMM, IsPD128Commutable, 5536 IsCommutable>, EVEX_V128, PD, VEX_W, 5537 EVEX_CD8<64, CD8VF>; 5538 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5539 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5540 EVEX_CD8<64, CD8VF>; 5541 } 5542} 5543 5544let Uses = [MXCSR] in 5545multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5546 X86SchedWriteSizes sched> { 5547 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5548 v16f32_info>, 5549 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5550 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5551 v8f64_info>, 5552 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5553} 5554 5555let Uses = [MXCSR] in 5556multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5557 X86SchedWriteSizes sched> { 5558 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5559 v16f32_info>, 5560 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5561 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5562 v8f64_info>, 5563 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5564} 5565 5566defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5567 SchedWriteFAddSizes, 1>, 5568 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5569defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5570 SchedWriteFMulSizes, 1>, 5571 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5572defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5573 SchedWriteFAddSizes>, 5574 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5575defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5576 SchedWriteFDivSizes>, 5577 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5578defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5579 SchedWriteFCmpSizes, 0>, 5580 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5581defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5582 SchedWriteFCmpSizes, 0>, 5583 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5584let isCodeGenOnly = 1 in { 5585 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5586 SchedWriteFCmpSizes, 1>; 5587 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5588 SchedWriteFCmpSizes, 1>; 5589} 5590let Uses = []<Register>, mayRaiseFPException = 0 in { 5591defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5592 SchedWriteFLogicSizes, 1>; 5593defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5594 SchedWriteFLogicSizes, 0>; 5595defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5596 SchedWriteFLogicSizes, 1>; 5597defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5598 SchedWriteFLogicSizes, 1>; 5599} 5600 5601multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5602 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5603 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5604 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5605 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5606 "$src2, $src1", "$src1, $src2", 5607 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5608 EVEX_4V, Sched<[sched]>; 5609 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5610 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5611 "$src2, $src1", "$src1, $src2", 5612 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5613 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5614 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5615 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5616 "${src2}"#_.BroadcastStr#", $src1", 5617 "$src1, ${src2}"#_.BroadcastStr, 5618 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5619 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5620 } 5621} 5622 5623multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5624 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5625 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5626 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5627 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5628 "$src2, $src1", "$src1, $src2", 5629 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5630 Sched<[sched]>; 5631 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5632 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5633 "$src2, $src1", "$src1, $src2", 5634 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5635 Sched<[sched.Folded, sched.ReadAfterFold]>; 5636 } 5637} 5638 5639multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5640 X86SchedWriteWidths sched> { 5641 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5642 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5643 EVEX_V512, EVEX_CD8<32, CD8VF>; 5644 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5645 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5646 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 5647 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5648 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5649 X86scalefsRnd, sched.Scl>, 5650 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5651 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5652 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5653 X86scalefsRnd, sched.Scl>, 5654 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W; 5655 5656 // Define only if AVX512VL feature is present. 5657 let Predicates = [HasVLX] in { 5658 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5659 EVEX_V128, EVEX_CD8<32, CD8VF>; 5660 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5661 EVEX_V256, EVEX_CD8<32, CD8VF>; 5662 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5663 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 5664 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5665 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 5666 } 5667} 5668defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 5669 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; 5670 5671//===----------------------------------------------------------------------===// 5672// AVX-512 VPTESTM instructions 5673//===----------------------------------------------------------------------===// 5674 5675multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5676 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5677 string Name> { 5678 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5679 // There are just too many permutations due to commutability and bitcasts. 5680 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5681 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5682 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5683 "$src2, $src1", "$src1, $src2", 5684 (null_frag), (null_frag), 1>, 5685 EVEX_4V, Sched<[sched]>; 5686 let mayLoad = 1 in 5687 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5688 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5689 "$src2, $src1", "$src1, $src2", 5690 (null_frag), (null_frag)>, 5691 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5692 Sched<[sched.Folded, sched.ReadAfterFold]>; 5693 } 5694} 5695 5696multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5697 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5698 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5699 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5700 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5701 "${src2}"#_.BroadcastStr#", $src1", 5702 "$src1, ${src2}"#_.BroadcastStr, 5703 (null_frag), (null_frag)>, 5704 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5705 Sched<[sched.Folded, sched.ReadAfterFold]>; 5706} 5707 5708multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5709 X86SchedWriteWidths sched, 5710 AVX512VLVectorVTInfo _> { 5711 let Predicates = [HasAVX512] in 5712 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>, 5713 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5714 5715 let Predicates = [HasAVX512, HasVLX] in { 5716 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>, 5717 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5718 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>, 5719 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5720 } 5721} 5722 5723multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5724 X86SchedWriteWidths sched> { 5725 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5726 avx512vl_i32_info>; 5727 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5728 avx512vl_i64_info>, VEX_W; 5729} 5730 5731multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5732 X86SchedWriteWidths sched> { 5733 let Predicates = [HasBWI] in { 5734 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5735 v32i16_info, NAME#"W">, EVEX_V512, VEX_W; 5736 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5737 v64i8_info, NAME#"B">, EVEX_V512; 5738 } 5739 let Predicates = [HasVLX, HasBWI] in { 5740 5741 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5742 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W; 5743 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5744 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W; 5745 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5746 v32i8x_info, NAME#"B">, EVEX_V256; 5747 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5748 v16i8x_info, NAME#"B">, EVEX_V128; 5749 } 5750} 5751 5752multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5753 X86SchedWriteWidths sched> : 5754 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5755 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5756 5757defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5758 SchedWriteVecLogic>, T8PD; 5759defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5760 SchedWriteVecLogic>, T8XS; 5761 5762//===----------------------------------------------------------------------===// 5763// AVX-512 Shift instructions 5764//===----------------------------------------------------------------------===// 5765 5766multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5767 string OpcodeStr, SDNode OpNode, 5768 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5769 let ExeDomain = _.ExeDomain in { 5770 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5771 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5772 "$src2, $src1", "$src1, $src2", 5773 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5774 Sched<[sched]>; 5775 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5776 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5777 "$src2, $src1", "$src1, $src2", 5778 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5779 (i8 timm:$src2)))>, 5780 Sched<[sched.Folded]>; 5781 } 5782} 5783 5784multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5785 string OpcodeStr, SDNode OpNode, 5786 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5787 let ExeDomain = _.ExeDomain in 5788 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5789 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5790 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 5791 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5792 EVEX_B, Sched<[sched.Folded]>; 5793} 5794 5795multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5796 X86FoldableSchedWrite sched, ValueType SrcVT, 5797 X86VectorVTInfo _> { 5798 // src2 is always 128-bit 5799 let ExeDomain = _.ExeDomain in { 5800 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5801 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5802 "$src2, $src1", "$src1, $src2", 5803 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5804 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5805 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5806 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5807 "$src2, $src1", "$src1, $src2", 5808 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5809 AVX512BIBase, 5810 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5811 } 5812} 5813 5814multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5815 X86SchedWriteWidths sched, ValueType SrcVT, 5816 AVX512VLVectorVTInfo VTInfo, 5817 Predicate prd> { 5818 let Predicates = [prd] in 5819 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5820 VTInfo.info512>, EVEX_V512, 5821 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5822 let Predicates = [prd, HasVLX] in { 5823 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5824 VTInfo.info256>, EVEX_V256, 5825 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5826 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5827 VTInfo.info128>, EVEX_V128, 5828 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5829 } 5830} 5831 5832multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5833 string OpcodeStr, SDNode OpNode, 5834 X86SchedWriteWidths sched, 5835 bit NotEVEX2VEXConvertibleQ = 0> { 5836 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5837 avx512vl_i32_info, HasAVX512>; 5838 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5839 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5840 avx512vl_i64_info, HasAVX512>, VEX_W; 5841 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5842 avx512vl_i16_info, HasBWI>; 5843} 5844 5845multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5846 string OpcodeStr, SDNode OpNode, 5847 X86SchedWriteWidths sched, 5848 AVX512VLVectorVTInfo VTInfo> { 5849 let Predicates = [HasAVX512] in 5850 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5851 sched.ZMM, VTInfo.info512>, 5852 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5853 VTInfo.info512>, EVEX_V512; 5854 let Predicates = [HasAVX512, HasVLX] in { 5855 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5856 sched.YMM, VTInfo.info256>, 5857 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5858 VTInfo.info256>, EVEX_V256; 5859 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5860 sched.XMM, VTInfo.info128>, 5861 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5862 VTInfo.info128>, EVEX_V128; 5863 } 5864} 5865 5866multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5867 string OpcodeStr, SDNode OpNode, 5868 X86SchedWriteWidths sched> { 5869 let Predicates = [HasBWI] in 5870 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5871 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 5872 let Predicates = [HasVLX, HasBWI] in { 5873 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5874 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 5875 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5876 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 5877 } 5878} 5879 5880multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5881 Format ImmFormR, Format ImmFormM, 5882 string OpcodeStr, SDNode OpNode, 5883 X86SchedWriteWidths sched, 5884 bit NotEVEX2VEXConvertibleQ = 0> { 5885 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 5886 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 5887 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5888 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 5889 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 5890} 5891 5892defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 5893 SchedWriteVecShiftImm>, 5894 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 5895 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5896 5897defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 5898 SchedWriteVecShiftImm>, 5899 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 5900 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5901 5902defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 5903 SchedWriteVecShiftImm, 1>, 5904 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 5905 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5906 5907defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 5908 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5909defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 5910 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5911 5912defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 5913 SchedWriteVecShift>; 5914defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 5915 SchedWriteVecShift, 1>; 5916defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 5917 SchedWriteVecShift>; 5918 5919// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 5920let Predicates = [HasAVX512, NoVLX] in { 5921 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 5922 (EXTRACT_SUBREG (v8i64 5923 (VPSRAQZrr 5924 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5925 VR128X:$src2)), sub_ymm)>; 5926 5927 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5928 (EXTRACT_SUBREG (v8i64 5929 (VPSRAQZrr 5930 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5931 VR128X:$src2)), sub_xmm)>; 5932 5933 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 5934 (EXTRACT_SUBREG (v8i64 5935 (VPSRAQZri 5936 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5937 timm:$src2)), sub_ymm)>; 5938 5939 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 5940 (EXTRACT_SUBREG (v8i64 5941 (VPSRAQZri 5942 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5943 timm:$src2)), sub_xmm)>; 5944} 5945 5946//===-------------------------------------------------------------------===// 5947// Variable Bit Shifts 5948//===-------------------------------------------------------------------===// 5949 5950multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 5951 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5952 let ExeDomain = _.ExeDomain in { 5953 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5954 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5955 "$src2, $src1", "$src1, $src2", 5956 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 5957 AVX5128IBase, EVEX_4V, Sched<[sched]>; 5958 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5959 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5960 "$src2, $src1", "$src1, $src2", 5961 (_.VT (OpNode _.RC:$src1, 5962 (_.VT (_.LdFrag addr:$src2))))>, 5963 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5964 Sched<[sched.Folded, sched.ReadAfterFold]>; 5965 } 5966} 5967 5968multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5969 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5970 let ExeDomain = _.ExeDomain in 5971 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5972 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5973 "${src2}"#_.BroadcastStr#", $src1", 5974 "$src1, ${src2}"#_.BroadcastStr, 5975 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 5976 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5977 Sched<[sched.Folded, sched.ReadAfterFold]>; 5978} 5979 5980multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5981 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 5982 let Predicates = [HasAVX512] in 5983 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 5984 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 5985 5986 let Predicates = [HasAVX512, HasVLX] in { 5987 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 5988 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 5989 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 5990 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 5991 } 5992} 5993 5994multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 5995 SDNode OpNode, X86SchedWriteWidths sched> { 5996 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 5997 avx512vl_i32_info>; 5998 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 5999 avx512vl_i64_info>, VEX_W; 6000} 6001 6002// Use 512bit version to implement 128/256 bit in case NoVLX. 6003multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6004 SDNode OpNode, list<Predicate> p> { 6005 let Predicates = p in { 6006 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6007 (_.info256.VT _.info256.RC:$src2))), 6008 (EXTRACT_SUBREG 6009 (!cast<Instruction>(OpcodeStr#"Zrr") 6010 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6011 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6012 sub_ymm)>; 6013 6014 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6015 (_.info128.VT _.info128.RC:$src2))), 6016 (EXTRACT_SUBREG 6017 (!cast<Instruction>(OpcodeStr#"Zrr") 6018 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6019 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6020 sub_xmm)>; 6021 } 6022} 6023multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6024 SDNode OpNode, X86SchedWriteWidths sched> { 6025 let Predicates = [HasBWI] in 6026 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6027 EVEX_V512, VEX_W; 6028 let Predicates = [HasVLX, HasBWI] in { 6029 6030 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6031 EVEX_V256, VEX_W; 6032 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6033 EVEX_V128, VEX_W; 6034 } 6035} 6036 6037defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6038 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6039 6040defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6041 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6042 6043defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6044 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6045 6046defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6047defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6048 6049defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6050defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6051defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6052defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6053 6054 6055// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6056let Predicates = [HasAVX512, NoVLX] in { 6057 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6058 (EXTRACT_SUBREG (v8i64 6059 (VPROLVQZrr 6060 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6061 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6062 sub_xmm)>; 6063 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6064 (EXTRACT_SUBREG (v8i64 6065 (VPROLVQZrr 6066 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6067 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6068 sub_ymm)>; 6069 6070 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6071 (EXTRACT_SUBREG (v16i32 6072 (VPROLVDZrr 6073 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6074 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6075 sub_xmm)>; 6076 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6077 (EXTRACT_SUBREG (v16i32 6078 (VPROLVDZrr 6079 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6080 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6081 sub_ymm)>; 6082 6083 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6084 (EXTRACT_SUBREG (v8i64 6085 (VPROLQZri 6086 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6087 timm:$src2)), sub_xmm)>; 6088 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6089 (EXTRACT_SUBREG (v8i64 6090 (VPROLQZri 6091 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6092 timm:$src2)), sub_ymm)>; 6093 6094 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6095 (EXTRACT_SUBREG (v16i32 6096 (VPROLDZri 6097 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6098 timm:$src2)), sub_xmm)>; 6099 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6100 (EXTRACT_SUBREG (v16i32 6101 (VPROLDZri 6102 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6103 timm:$src2)), sub_ymm)>; 6104} 6105 6106// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6107let Predicates = [HasAVX512, NoVLX] in { 6108 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6109 (EXTRACT_SUBREG (v8i64 6110 (VPRORVQZrr 6111 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6112 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6113 sub_xmm)>; 6114 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6115 (EXTRACT_SUBREG (v8i64 6116 (VPRORVQZrr 6117 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6118 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6119 sub_ymm)>; 6120 6121 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6122 (EXTRACT_SUBREG (v16i32 6123 (VPRORVDZrr 6124 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6125 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6126 sub_xmm)>; 6127 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6128 (EXTRACT_SUBREG (v16i32 6129 (VPRORVDZrr 6130 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6131 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6132 sub_ymm)>; 6133 6134 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6135 (EXTRACT_SUBREG (v8i64 6136 (VPRORQZri 6137 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6138 timm:$src2)), sub_xmm)>; 6139 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6140 (EXTRACT_SUBREG (v8i64 6141 (VPRORQZri 6142 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6143 timm:$src2)), sub_ymm)>; 6144 6145 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6146 (EXTRACT_SUBREG (v16i32 6147 (VPRORDZri 6148 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6149 timm:$src2)), sub_xmm)>; 6150 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6151 (EXTRACT_SUBREG (v16i32 6152 (VPRORDZri 6153 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6154 timm:$src2)), sub_ymm)>; 6155} 6156 6157//===-------------------------------------------------------------------===// 6158// 1-src variable permutation VPERMW/D/Q 6159//===-------------------------------------------------------------------===// 6160 6161multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6162 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6163 let Predicates = [HasAVX512] in 6164 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6165 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6166 6167 let Predicates = [HasAVX512, HasVLX] in 6168 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6169 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6170} 6171 6172multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6173 string OpcodeStr, SDNode OpNode, 6174 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6175 let Predicates = [HasAVX512] in 6176 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6177 sched, VTInfo.info512>, 6178 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6179 sched, VTInfo.info512>, EVEX_V512; 6180 let Predicates = [HasAVX512, HasVLX] in 6181 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6182 sched, VTInfo.info256>, 6183 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6184 sched, VTInfo.info256>, EVEX_V256; 6185} 6186 6187multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6188 Predicate prd, SDNode OpNode, 6189 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6190 let Predicates = [prd] in 6191 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6192 EVEX_V512 ; 6193 let Predicates = [HasVLX, prd] in { 6194 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6195 EVEX_V256 ; 6196 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6197 EVEX_V128 ; 6198 } 6199} 6200 6201defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6202 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6203defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6204 WriteVarShuffle256, avx512vl_i8_info>; 6205 6206defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6207 WriteVarShuffle256, avx512vl_i32_info>; 6208defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6209 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6210defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6211 WriteFVarShuffle256, avx512vl_f32_info>; 6212defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6213 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6214 6215defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6216 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6217 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6218defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6219 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6220 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6221 6222//===----------------------------------------------------------------------===// 6223// AVX-512 - VPERMIL 6224//===----------------------------------------------------------------------===// 6225 6226multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6227 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6228 X86VectorVTInfo Ctrl> { 6229 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6230 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6231 "$src2, $src1", "$src1, $src2", 6232 (_.VT (OpNode _.RC:$src1, 6233 (Ctrl.VT Ctrl.RC:$src2)))>, 6234 T8PD, EVEX_4V, Sched<[sched]>; 6235 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6236 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6237 "$src2, $src1", "$src1, $src2", 6238 (_.VT (OpNode 6239 _.RC:$src1, 6240 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6241 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6242 Sched<[sched.Folded, sched.ReadAfterFold]>; 6243 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6244 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6245 "${src2}"#_.BroadcastStr#", $src1", 6246 "$src1, ${src2}"#_.BroadcastStr, 6247 (_.VT (OpNode 6248 _.RC:$src1, 6249 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6250 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6251 Sched<[sched.Folded, sched.ReadAfterFold]>; 6252} 6253 6254multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6255 X86SchedWriteWidths sched, 6256 AVX512VLVectorVTInfo _, 6257 AVX512VLVectorVTInfo Ctrl> { 6258 let Predicates = [HasAVX512] in { 6259 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6260 _.info512, Ctrl.info512>, EVEX_V512; 6261 } 6262 let Predicates = [HasAVX512, HasVLX] in { 6263 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6264 _.info128, Ctrl.info128>, EVEX_V128; 6265 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6266 _.info256, Ctrl.info256>, EVEX_V256; 6267 } 6268} 6269 6270multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6271 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6272 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6273 _, Ctrl>; 6274 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6275 X86VPermilpi, SchedWriteFShuffle, _>, 6276 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6277} 6278 6279let ExeDomain = SSEPackedSingle in 6280defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6281 avx512vl_i32_info>; 6282let ExeDomain = SSEPackedDouble in 6283defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6284 avx512vl_i64_info>, VEX_W1X; 6285 6286//===----------------------------------------------------------------------===// 6287// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6288//===----------------------------------------------------------------------===// 6289 6290defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6291 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6292 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6293defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6294 X86PShufhw, SchedWriteShuffle>, 6295 EVEX, AVX512XSIi8Base; 6296defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6297 X86PShuflw, SchedWriteShuffle>, 6298 EVEX, AVX512XDIi8Base; 6299 6300//===----------------------------------------------------------------------===// 6301// AVX-512 - VPSHUFB 6302//===----------------------------------------------------------------------===// 6303 6304multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6305 X86SchedWriteWidths sched> { 6306 let Predicates = [HasBWI] in 6307 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6308 EVEX_V512; 6309 6310 let Predicates = [HasVLX, HasBWI] in { 6311 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6312 EVEX_V256; 6313 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6314 EVEX_V128; 6315 } 6316} 6317 6318defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6319 SchedWriteVarShuffle>, VEX_WIG; 6320 6321//===----------------------------------------------------------------------===// 6322// Move Low to High and High to Low packed FP Instructions 6323//===----------------------------------------------------------------------===// 6324 6325def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6326 (ins VR128X:$src1, VR128X:$src2), 6327 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6328 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6329 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6330let isCommutable = 1 in 6331def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6332 (ins VR128X:$src1, VR128X:$src2), 6333 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6334 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6335 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6336 6337//===----------------------------------------------------------------------===// 6338// VMOVHPS/PD VMOVLPS Instructions 6339// All patterns was taken from SSS implementation. 6340//===----------------------------------------------------------------------===// 6341 6342multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6343 SDPatternOperator OpNode, 6344 X86VectorVTInfo _> { 6345 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6346 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6347 (ins _.RC:$src1, f64mem:$src2), 6348 !strconcat(OpcodeStr, 6349 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6350 [(set _.RC:$dst, 6351 (OpNode _.RC:$src1, 6352 (_.VT (bitconvert 6353 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6354 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6355} 6356 6357// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6358// SSE1. And MOVLPS pattern is even more complex. 6359defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6360 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6361defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6362 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6363defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6364 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6365defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6366 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6367 6368let Predicates = [HasAVX512] in { 6369 // VMOVHPD patterns 6370 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6371 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6372 6373 // VMOVLPD patterns 6374 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6375 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6376} 6377 6378let SchedRW = [WriteFStore] in { 6379let mayStore = 1, hasSideEffects = 0 in 6380def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6381 (ins f64mem:$dst, VR128X:$src), 6382 "vmovhps\t{$src, $dst|$dst, $src}", 6383 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6384def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6385 (ins f64mem:$dst, VR128X:$src), 6386 "vmovhpd\t{$src, $dst|$dst, $src}", 6387 [(store (f64 (extractelt 6388 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6389 (iPTR 0))), addr:$dst)]>, 6390 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6391let mayStore = 1, hasSideEffects = 0 in 6392def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6393 (ins f64mem:$dst, VR128X:$src), 6394 "vmovlps\t{$src, $dst|$dst, $src}", 6395 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6396def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6397 (ins f64mem:$dst, VR128X:$src), 6398 "vmovlpd\t{$src, $dst|$dst, $src}", 6399 [(store (f64 (extractelt (v2f64 VR128X:$src), 6400 (iPTR 0))), addr:$dst)]>, 6401 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6402} // SchedRW 6403 6404let Predicates = [HasAVX512] in { 6405 // VMOVHPD patterns 6406 def : Pat<(store (f64 (extractelt 6407 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6408 (iPTR 0))), addr:$dst), 6409 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6410} 6411//===----------------------------------------------------------------------===// 6412// FMA - Fused Multiply Operations 6413// 6414 6415multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6416 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6417 X86VectorVTInfo _, string Suff> { 6418 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6419 Uses = [MXCSR], mayRaiseFPException = 1 in { 6420 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6421 (ins _.RC:$src2, _.RC:$src3), 6422 OpcodeStr, "$src3, $src2", "$src2, $src3", 6423 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6424 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6425 AVX512FMA3Base, Sched<[sched]>; 6426 6427 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6428 (ins _.RC:$src2, _.MemOp:$src3), 6429 OpcodeStr, "$src3, $src2", "$src2, $src3", 6430 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6431 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6432 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6433 6434 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6435 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6436 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6437 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6438 (OpNode _.RC:$src2, 6439 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6440 (MaskOpNode _.RC:$src2, 6441 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6442 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6443 } 6444} 6445 6446multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6447 X86FoldableSchedWrite sched, 6448 X86VectorVTInfo _, string Suff> { 6449 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6450 Uses = [MXCSR] in 6451 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6452 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6453 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6454 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6455 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6456 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6457} 6458 6459multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6460 SDNode MaskOpNode, SDNode OpNodeRnd, 6461 X86SchedWriteWidths sched, 6462 AVX512VLVectorVTInfo _, string Suff> { 6463 let Predicates = [HasAVX512] in { 6464 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6465 sched.ZMM, _.info512, Suff>, 6466 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6467 _.info512, Suff>, 6468 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6469 } 6470 let Predicates = [HasVLX, HasAVX512] in { 6471 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6472 sched.YMM, _.info256, Suff>, 6473 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6474 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6475 sched.XMM, _.info128, Suff>, 6476 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6477 } 6478} 6479 6480multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6481 SDNode MaskOpNode, SDNode OpNodeRnd> { 6482 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6483 OpNodeRnd, SchedWriteFMA, 6484 avx512vl_f32_info, "PS">; 6485 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6486 OpNodeRnd, SchedWriteFMA, 6487 avx512vl_f64_info, "PD">, VEX_W; 6488} 6489 6490defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, 6491 X86Fmadd, X86FmaddRnd>; 6492defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6493 X86Fmsub, X86FmsubRnd>; 6494defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6495 X86Fmaddsub, X86FmaddsubRnd>; 6496defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6497 X86Fmsubadd, X86FmsubaddRnd>; 6498defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6499 X86Fnmadd, X86FnmaddRnd>; 6500defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6501 X86Fnmsub, X86FnmsubRnd>; 6502 6503 6504multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6505 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6506 X86VectorVTInfo _, string Suff> { 6507 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6508 Uses = [MXCSR], mayRaiseFPException = 1 in { 6509 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6510 (ins _.RC:$src2, _.RC:$src3), 6511 OpcodeStr, "$src3, $src2", "$src2, $src3", 6512 (null_frag), 6513 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6514 AVX512FMA3Base, Sched<[sched]>; 6515 6516 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6517 (ins _.RC:$src2, _.MemOp:$src3), 6518 OpcodeStr, "$src3, $src2", "$src2, $src3", 6519 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6520 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6521 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6522 6523 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6524 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6525 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6526 "$src2, ${src3}"#_.BroadcastStr, 6527 (_.VT (OpNode _.RC:$src2, 6528 (_.VT (_.BroadcastLdFrag addr:$src3)), 6529 _.RC:$src1)), 6530 (_.VT (MaskOpNode _.RC:$src2, 6531 (_.VT (_.BroadcastLdFrag addr:$src3)), 6532 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, 6533 Sched<[sched.Folded, sched.ReadAfterFold]>; 6534 } 6535} 6536 6537multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6538 X86FoldableSchedWrite sched, 6539 X86VectorVTInfo _, string Suff> { 6540 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6541 Uses = [MXCSR] in 6542 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6543 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6544 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6545 (null_frag), 6546 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6547 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6548} 6549 6550multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6551 SDNode MaskOpNode, SDNode OpNodeRnd, 6552 X86SchedWriteWidths sched, 6553 AVX512VLVectorVTInfo _, string Suff> { 6554 let Predicates = [HasAVX512] in { 6555 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6556 sched.ZMM, _.info512, Suff>, 6557 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6558 _.info512, Suff>, 6559 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6560 } 6561 let Predicates = [HasVLX, HasAVX512] in { 6562 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6563 sched.YMM, _.info256, Suff>, 6564 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6565 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6566 sched.XMM, _.info128, Suff>, 6567 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6568 } 6569} 6570 6571multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6572 SDNode MaskOpNode, SDNode OpNodeRnd > { 6573 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6574 OpNodeRnd, SchedWriteFMA, 6575 avx512vl_f32_info, "PS">; 6576 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6577 OpNodeRnd, SchedWriteFMA, 6578 avx512vl_f64_info, "PD">, VEX_W; 6579} 6580 6581defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, 6582 X86Fmadd, X86FmaddRnd>; 6583defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6584 X86Fmsub, X86FmsubRnd>; 6585defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6586 X86Fmaddsub, X86FmaddsubRnd>; 6587defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6588 X86Fmsubadd, X86FmsubaddRnd>; 6589defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6590 X86Fnmadd, X86FnmaddRnd>; 6591defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6592 X86Fnmsub, X86FnmsubRnd>; 6593 6594multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6595 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6596 X86VectorVTInfo _, string Suff> { 6597 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6598 Uses = [MXCSR], mayRaiseFPException = 1 in { 6599 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6600 (ins _.RC:$src2, _.RC:$src3), 6601 OpcodeStr, "$src3, $src2", "$src2, $src3", 6602 (null_frag), 6603 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6604 AVX512FMA3Base, Sched<[sched]>; 6605 6606 // Pattern is 312 order so that the load is in a different place from the 6607 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6608 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6609 (ins _.RC:$src2, _.MemOp:$src3), 6610 OpcodeStr, "$src3, $src2", "$src2, $src3", 6611 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6612 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6613 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6614 6615 // Pattern is 312 order so that the load is in a different place from the 6616 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6617 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6618 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6619 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6620 "$src2, ${src3}"#_.BroadcastStr, 6621 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6622 _.RC:$src1, _.RC:$src2)), 6623 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6624 _.RC:$src1, _.RC:$src2)), 1, 0>, 6625 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6626 } 6627} 6628 6629multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6630 X86FoldableSchedWrite sched, 6631 X86VectorVTInfo _, string Suff> { 6632 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6633 Uses = [MXCSR] in 6634 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6635 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6636 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6637 (null_frag), 6638 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6639 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6640} 6641 6642multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6643 SDNode MaskOpNode, SDNode OpNodeRnd, 6644 X86SchedWriteWidths sched, 6645 AVX512VLVectorVTInfo _, string Suff> { 6646 let Predicates = [HasAVX512] in { 6647 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6648 sched.ZMM, _.info512, Suff>, 6649 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6650 _.info512, Suff>, 6651 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6652 } 6653 let Predicates = [HasVLX, HasAVX512] in { 6654 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6655 sched.YMM, _.info256, Suff>, 6656 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6657 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6658 sched.XMM, _.info128, Suff>, 6659 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6660 } 6661} 6662 6663multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6664 SDNode MaskOpNode, SDNode OpNodeRnd > { 6665 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6666 OpNodeRnd, SchedWriteFMA, 6667 avx512vl_f32_info, "PS">; 6668 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6669 OpNodeRnd, SchedWriteFMA, 6670 avx512vl_f64_info, "PD">, VEX_W; 6671} 6672 6673defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, 6674 X86Fmadd, X86FmaddRnd>; 6675defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 6676 X86Fmsub, X86FmsubRnd>; 6677defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 6678 X86Fmaddsub, X86FmaddsubRnd>; 6679defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 6680 X86Fmsubadd, X86FmsubaddRnd>; 6681defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 6682 X86Fnmadd, X86FnmaddRnd>; 6683defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 6684 X86Fnmsub, X86FnmsubRnd>; 6685 6686// Scalar FMA 6687multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6688 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6689let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6690 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6691 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6692 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6693 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6694 6695 let mayLoad = 1 in 6696 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6697 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6698 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6699 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6700 6701 let Uses = [MXCSR] in 6702 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6703 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6704 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6705 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6706 6707 let isCodeGenOnly = 1, isCommutable = 1 in { 6708 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6709 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6710 !strconcat(OpcodeStr, 6711 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6712 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6713 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst), 6714 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6715 !strconcat(OpcodeStr, 6716 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6717 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6718 6719 let Uses = [MXCSR] in 6720 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6721 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6722 !strconcat(OpcodeStr, 6723 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6724 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6725 Sched<[SchedWriteFMA.Scl]>; 6726 }// isCodeGenOnly = 1 6727}// Constraints = "$src1 = $dst" 6728} 6729 6730multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6731 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 6732 X86VectorVTInfo _, string SUFF> { 6733 let ExeDomain = _.ExeDomain in { 6734 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6735 // Operands for intrinsic are in 123 order to preserve passthu 6736 // semantics. 6737 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6738 _.FRC:$src3))), 6739 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6740 (_.ScalarLdFrag addr:$src3)))), 6741 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6742 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6743 6744 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6745 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6746 _.FRC:$src1))), 6747 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6748 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6749 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6750 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6751 6752 // One pattern is 312 order so that the load is in a different place from the 6753 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6754 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6755 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6756 _.FRC:$src2))), 6757 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6758 _.FRC:$src1, _.FRC:$src2))), 6759 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6760 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6761 } 6762} 6763 6764multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6765 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { 6766 let Predicates = [HasAVX512] in { 6767 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6768 OpNodeRnd, f32x_info, "SS">, 6769 EVEX_CD8<32, CD8VT1>, VEX_LIG; 6770 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6771 OpNodeRnd, f64x_info, "SD">, 6772 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 6773 } 6774} 6775 6776defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>; 6777defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 6778defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 6779defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 6780 6781multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp, 6782 SDNode RndOp, string Prefix, 6783 string Suffix, SDNode Move, 6784 X86VectorVTInfo _, PatLeaf ZeroFP> { 6785 let Predicates = [HasAVX512] in { 6786 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6787 (Op _.FRC:$src2, 6788 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6789 _.FRC:$src3))))), 6790 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6791 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6792 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6793 6794 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6795 (Op _.FRC:$src2, _.FRC:$src3, 6796 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6797 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6798 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6799 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6800 6801 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6802 (Op _.FRC:$src2, 6803 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6804 (_.ScalarLdFrag addr:$src3)))))), 6805 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6806 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6807 addr:$src3)>; 6808 6809 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6810 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6811 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6812 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6813 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6814 addr:$src3)>; 6815 6816 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6817 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6818 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6819 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6820 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6821 addr:$src3)>; 6822 6823 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6824 (X86selects_mask VK1WM:$mask, 6825 (MaskedOp _.FRC:$src2, 6826 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6827 _.FRC:$src3), 6828 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6829 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6830 VR128X:$src1, VK1WM:$mask, 6831 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6832 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6833 6834 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6835 (X86selects_mask VK1WM:$mask, 6836 (MaskedOp _.FRC:$src2, 6837 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6838 (_.ScalarLdFrag addr:$src3)), 6839 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6840 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6841 VR128X:$src1, VK1WM:$mask, 6842 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6843 6844 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6845 (X86selects_mask VK1WM:$mask, 6846 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6847 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6848 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6849 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6850 VR128X:$src1, VK1WM:$mask, 6851 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6852 6853 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6854 (X86selects_mask VK1WM:$mask, 6855 (MaskedOp _.FRC:$src2, _.FRC:$src3, 6856 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6857 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6858 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 6859 VR128X:$src1, VK1WM:$mask, 6860 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6861 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6862 6863 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6864 (X86selects_mask VK1WM:$mask, 6865 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6866 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6867 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6868 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 6869 VR128X:$src1, VK1WM:$mask, 6870 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6871 6872 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6873 (X86selects_mask VK1WM:$mask, 6874 (MaskedOp _.FRC:$src2, 6875 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6876 _.FRC:$src3), 6877 (_.EltVT ZeroFP)))))), 6878 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 6879 VR128X:$src1, VK1WM:$mask, 6880 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6881 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6882 6883 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6884 (X86selects_mask VK1WM:$mask, 6885 (MaskedOp _.FRC:$src2, _.FRC:$src3, 6886 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6887 (_.EltVT ZeroFP)))))), 6888 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 6889 VR128X:$src1, VK1WM:$mask, 6890 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6891 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6892 6893 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6894 (X86selects_mask VK1WM:$mask, 6895 (MaskedOp _.FRC:$src2, 6896 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6897 (_.ScalarLdFrag addr:$src3)), 6898 (_.EltVT ZeroFP)))))), 6899 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 6900 VR128X:$src1, VK1WM:$mask, 6901 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6902 6903 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6904 (X86selects_mask VK1WM:$mask, 6905 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6906 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 6907 (_.EltVT ZeroFP)))))), 6908 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 6909 VR128X:$src1, VK1WM:$mask, 6910 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6911 6912 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6913 (X86selects_mask VK1WM:$mask, 6914 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6915 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6916 (_.EltVT ZeroFP)))))), 6917 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 6918 VR128X:$src1, VK1WM:$mask, 6919 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6920 6921 // Patterns with rounding mode. 6922 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6923 (RndOp _.FRC:$src2, 6924 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6925 _.FRC:$src3, (i32 timm:$rc)))))), 6926 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 6927 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6928 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6929 6930 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6931 (RndOp _.FRC:$src2, _.FRC:$src3, 6932 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6933 (i32 timm:$rc)))))), 6934 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 6935 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6936 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6937 6938 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6939 (X86selects_mask VK1WM:$mask, 6940 (RndOp _.FRC:$src2, 6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6942 _.FRC:$src3, (i32 timm:$rc)), 6943 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6944 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 6945 VR128X:$src1, VK1WM:$mask, 6946 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6947 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6948 6949 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6950 (X86selects_mask VK1WM:$mask, 6951 (RndOp _.FRC:$src2, _.FRC:$src3, 6952 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6953 (i32 timm:$rc)), 6954 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6955 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 6956 VR128X:$src1, VK1WM:$mask, 6957 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6958 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6959 6960 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6961 (X86selects_mask VK1WM:$mask, 6962 (RndOp _.FRC:$src2, 6963 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6964 _.FRC:$src3, (i32 timm:$rc)), 6965 (_.EltVT ZeroFP)))))), 6966 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 6967 VR128X:$src1, VK1WM:$mask, 6968 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6969 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6970 6971 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6972 (X86selects_mask VK1WM:$mask, 6973 (RndOp _.FRC:$src2, _.FRC:$src3, 6974 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6975 (i32 timm:$rc)), 6976 (_.EltVT ZeroFP)))))), 6977 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 6978 VR128X:$src1, VK1WM:$mask, 6979 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6980 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6981 } 6982} 6983 6984defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD", 6985 "SS", X86Movss, v4f32x_info, fp32imm0>; 6986defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 6987 "SS", X86Movss, v4f32x_info, fp32imm0>; 6988defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 6989 "SS", X86Movss, v4f32x_info, fp32imm0>; 6990defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 6991 "SS", X86Movss, v4f32x_info, fp32imm0>; 6992 6993defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD", 6994 "SD", X86Movsd, v2f64x_info, fp64imm0>; 6995defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 6996 "SD", X86Movsd, v2f64x_info, fp64imm0>; 6997defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 6998 "SD", X86Movsd, v2f64x_info, fp64imm0>; 6999defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7000 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7001 7002//===----------------------------------------------------------------------===// 7003// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7004//===----------------------------------------------------------------------===// 7005let Constraints = "$src1 = $dst" in { 7006multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7007 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7008 // NOTE: The SDNode have the multiply operands first with the add last. 7009 // This enables commuted load patterns to be autogenerated by tablegen. 7010 let ExeDomain = _.ExeDomain in { 7011 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7012 (ins _.RC:$src2, _.RC:$src3), 7013 OpcodeStr, "$src3, $src2", "$src2, $src3", 7014 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7015 AVX512FMA3Base, Sched<[sched]>; 7016 7017 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7018 (ins _.RC:$src2, _.MemOp:$src3), 7019 OpcodeStr, "$src3, $src2", "$src2, $src3", 7020 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7021 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 7022 7023 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7024 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7025 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7026 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7027 (OpNode _.RC:$src2, 7028 (_.VT (_.BroadcastLdFrag addr:$src3)), 7029 _.RC:$src1)>, 7030 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 7031 } 7032} 7033} // Constraints = "$src1 = $dst" 7034 7035multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7036 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7037 let Predicates = [HasIFMA] in { 7038 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7039 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7040 } 7041 let Predicates = [HasVLX, HasIFMA] in { 7042 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7043 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7044 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7045 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7046 } 7047} 7048 7049defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7050 SchedWriteVecIMul, avx512vl_i64_info>, 7051 VEX_W; 7052defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7053 SchedWriteVecIMul, avx512vl_i64_info>, 7054 VEX_W; 7055 7056//===----------------------------------------------------------------------===// 7057// AVX-512 Scalar convert from sign integer to float/double 7058//===----------------------------------------------------------------------===// 7059 7060multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7061 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7062 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7063 string mem, list<Register> _Uses = [MXCSR], 7064 bit _mayRaiseFPException = 1> { 7065let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7066 mayRaiseFPException = _mayRaiseFPException in { 7067 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7068 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7069 (ins DstVT.FRC:$src1, SrcRC:$src), 7070 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7071 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7072 let mayLoad = 1 in 7073 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7074 (ins DstVT.FRC:$src1, x86memop:$src), 7075 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7076 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7077 } // hasSideEffects = 0 7078 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7079 (ins DstVT.RC:$src1, SrcRC:$src2), 7080 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7081 [(set DstVT.RC:$dst, 7082 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7083 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7084 7085 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7086 (ins DstVT.RC:$src1, x86memop:$src2), 7087 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7088 [(set DstVT.RC:$dst, 7089 (OpNode (DstVT.VT DstVT.RC:$src1), 7090 (ld_frag addr:$src2)))]>, 7091 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7092} 7093 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7094 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7095 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7096} 7097 7098multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7099 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7100 X86VectorVTInfo DstVT, string asm, 7101 string mem> { 7102 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7103 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7104 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7105 !strconcat(asm, 7106 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7107 [(set DstVT.RC:$dst, 7108 (OpNode (DstVT.VT DstVT.RC:$src1), 7109 SrcRC:$src2, 7110 (i32 timm:$rc)))]>, 7111 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7112 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7113 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7114 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7115} 7116 7117multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7118 X86FoldableSchedWrite sched, 7119 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7120 X86MemOperand x86memop, PatFrag ld_frag, 7121 string asm, string mem> { 7122 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7123 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7124 ld_frag, asm, mem>, VEX_LIG; 7125} 7126 7127let Predicates = [HasAVX512] in { 7128defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7129 WriteCvtI2SS, GR32, 7130 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7131 XS, EVEX_CD8<32, CD8VT1>; 7132defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7133 WriteCvtI2SS, GR64, 7134 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7135 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7136defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7137 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7138 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7139defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7140 WriteCvtI2SD, GR64, 7141 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7142 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7143 7144def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7145 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7146def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7147 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7148 7149def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7150 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7151def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7152 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7153def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7154 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7155def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7156 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7157 7158def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7159 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7160def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7161 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7162def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7163 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7164def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7165 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7166 7167defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7168 WriteCvtI2SS, GR32, 7169 v4f32x_info, i32mem, loadi32, 7170 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7171defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7172 WriteCvtI2SS, GR64, 7173 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7174 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7175defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7176 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7177 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7178defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7179 WriteCvtI2SD, GR64, 7180 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7181 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7182 7183def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7184 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7185def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7186 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7187 7188def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7189 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7190def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7191 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7192def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7193 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7194def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7195 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7196 7197def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7198 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7199def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7200 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7201def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7202 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7203def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7204 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7205} 7206 7207//===----------------------------------------------------------------------===// 7208// AVX-512 Scalar convert from float/double to integer 7209//===----------------------------------------------------------------------===// 7210 7211multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7212 X86VectorVTInfo DstVT, SDNode OpNode, 7213 SDNode OpNodeRnd, 7214 X86FoldableSchedWrite sched, string asm, 7215 string aliasStr> { 7216 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7217 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7218 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7219 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7220 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7221 let Uses = [MXCSR] in 7222 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7223 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7224 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7225 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7226 Sched<[sched]>; 7227 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7228 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7229 [(set DstVT.RC:$dst, (OpNode 7230 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7231 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7232 } // Predicates = [HasAVX512] 7233 7234 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7235 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7236 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7237 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7238 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7239 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7240 SrcVT.IntScalarMemOp:$src), 0, "att">; 7241} 7242 7243// Convert float/double to signed/unsigned int 32/64 7244defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7245 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7246 XS, EVEX_CD8<32, CD8VT1>; 7247defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7248 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7249 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7250defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7251 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7252 XS, EVEX_CD8<32, CD8VT1>; 7253defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7254 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7255 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7256defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7257 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7258 XD, EVEX_CD8<64, CD8VT1>; 7259defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7260 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7261 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7262defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7263 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7264 XD, EVEX_CD8<64, CD8VT1>; 7265defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7266 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7267 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7268 7269multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7270 X86VectorVTInfo DstVT, SDNode OpNode, 7271 X86FoldableSchedWrite sched, 7272 string aliasStr> { 7273 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7274 let isCodeGenOnly = 1 in { 7275 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7276 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7277 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7278 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7279 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7280 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7281 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7282 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7283 } 7284 } // Predicates = [HasAVX512] 7285} 7286 7287defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7288 lrint, WriteCvtSS2I, 7289 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7290defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7291 llrint, WriteCvtSS2I, 7292 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7293defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7294 lrint, WriteCvtSD2I, 7295 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7296defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7297 llrint, WriteCvtSD2I, 7298 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7299 7300let Predicates = [HasAVX512] in { 7301 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7302 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7303 7304 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7305 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7306} 7307 7308// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7309// which produce unnecessary vmovs{s,d} instructions 7310let Predicates = [HasAVX512] in { 7311def : Pat<(v4f32 (X86Movss 7312 (v4f32 VR128X:$dst), 7313 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7314 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7315 7316def : Pat<(v4f32 (X86Movss 7317 (v4f32 VR128X:$dst), 7318 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7319 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7320 7321def : Pat<(v4f32 (X86Movss 7322 (v4f32 VR128X:$dst), 7323 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7324 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7325 7326def : Pat<(v4f32 (X86Movss 7327 (v4f32 VR128X:$dst), 7328 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7329 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7330 7331def : Pat<(v2f64 (X86Movsd 7332 (v2f64 VR128X:$dst), 7333 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7334 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7335 7336def : Pat<(v2f64 (X86Movsd 7337 (v2f64 VR128X:$dst), 7338 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7339 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7340 7341def : Pat<(v2f64 (X86Movsd 7342 (v2f64 VR128X:$dst), 7343 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7344 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7345 7346def : Pat<(v2f64 (X86Movsd 7347 (v2f64 VR128X:$dst), 7348 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7349 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7350 7351def : Pat<(v4f32 (X86Movss 7352 (v4f32 VR128X:$dst), 7353 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7354 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7355 7356def : Pat<(v4f32 (X86Movss 7357 (v4f32 VR128X:$dst), 7358 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7359 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7360 7361def : Pat<(v4f32 (X86Movss 7362 (v4f32 VR128X:$dst), 7363 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7364 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7365 7366def : Pat<(v4f32 (X86Movss 7367 (v4f32 VR128X:$dst), 7368 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7369 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7370 7371def : Pat<(v2f64 (X86Movsd 7372 (v2f64 VR128X:$dst), 7373 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7374 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7375 7376def : Pat<(v2f64 (X86Movsd 7377 (v2f64 VR128X:$dst), 7378 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7379 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7380 7381def : Pat<(v2f64 (X86Movsd 7382 (v2f64 VR128X:$dst), 7383 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7384 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7385 7386def : Pat<(v2f64 (X86Movsd 7387 (v2f64 VR128X:$dst), 7388 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7389 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7390} // Predicates = [HasAVX512] 7391 7392// Convert float/double to signed/unsigned int 32/64 with truncation 7393multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7394 X86VectorVTInfo _DstRC, SDNode OpNode, 7395 SDNode OpNodeInt, SDNode OpNodeSAE, 7396 X86FoldableSchedWrite sched, string aliasStr>{ 7397let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in { 7398 let isCodeGenOnly = 1 in { 7399 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7400 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7401 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7402 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7403 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7404 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7405 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7406 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7407 } 7408 7409 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7410 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7411 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7412 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7413 let Uses = [MXCSR] in 7414 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7415 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7416 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7417 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7418 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7419 (ins _SrcRC.IntScalarMemOp:$src), 7420 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7421 [(set _DstRC.RC:$dst, 7422 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7423 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7424} //HasAVX512 7425 7426 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7427 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7428 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7429 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7430 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7431 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7432 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7433} 7434 7435defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7436 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7437 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7438defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7439 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7440 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7441defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7442 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7443 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7444defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7445 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7446 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7447 7448defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7449 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7450 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7451defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7452 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7453 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7454defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7455 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7456 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7457defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7458 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7459 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7460 7461//===----------------------------------------------------------------------===// 7462// AVX-512 Convert form float to double and back 7463//===----------------------------------------------------------------------===// 7464 7465let Uses = [MXCSR], mayRaiseFPException = 1 in 7466multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7467 X86VectorVTInfo _Src, SDNode OpNode, 7468 X86FoldableSchedWrite sched> { 7469 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7470 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7471 "$src2, $src1", "$src1, $src2", 7472 (_.VT (OpNode (_.VT _.RC:$src1), 7473 (_Src.VT _Src.RC:$src2)))>, 7474 EVEX_4V, VEX_LIG, Sched<[sched]>; 7475 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7476 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7477 "$src2, $src1", "$src1, $src2", 7478 (_.VT (OpNode (_.VT _.RC:$src1), 7479 (_Src.ScalarIntMemFrags addr:$src2)))>, 7480 EVEX_4V, VEX_LIG, 7481 Sched<[sched.Folded, sched.ReadAfterFold]>; 7482 7483 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7484 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7485 (ins _.FRC:$src1, _Src.FRC:$src2), 7486 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7487 EVEX_4V, VEX_LIG, Sched<[sched]>; 7488 let mayLoad = 1 in 7489 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7490 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7491 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7492 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7493 } 7494} 7495 7496// Scalar Conversion with SAE - suppress all exceptions 7497multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7498 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7499 X86FoldableSchedWrite sched> { 7500 let Uses = [MXCSR] in 7501 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7502 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7503 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7504 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7505 (_Src.VT _Src.RC:$src2)))>, 7506 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7507} 7508 7509// Scalar Conversion with rounding control (RC) 7510multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7511 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7512 X86FoldableSchedWrite sched> { 7513 let Uses = [MXCSR] in 7514 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7515 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7516 "$rc, $src2, $src1", "$src1, $src2, $rc", 7517 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7518 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7519 EVEX_4V, VEX_LIG, Sched<[sched]>, 7520 EVEX_B, EVEX_RC; 7521} 7522multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, 7523 SDNode OpNode, SDNode OpNodeRnd, 7524 X86FoldableSchedWrite sched, 7525 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7526 let Predicates = [HasAVX512] in { 7527 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7528 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7529 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; 7530 } 7531} 7532 7533multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, 7534 SDNode OpNode, SDNode OpNodeSAE, 7535 X86FoldableSchedWrite sched, 7536 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7537 let Predicates = [HasAVX512] in { 7538 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7539 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7540 EVEX_CD8<32, CD8VT1>, XS; 7541 } 7542} 7543defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds, 7544 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7545 f32x_info>; 7546defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts, 7547 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7548 f64x_info>; 7549 7550def : Pat<(f64 (any_fpextend FR32X:$src)), 7551 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7552 Requires<[HasAVX512]>; 7553def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7554 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7555 Requires<[HasAVX512, OptForSize]>; 7556 7557def : Pat<(f32 (any_fpround FR64X:$src)), 7558 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7559 Requires<[HasAVX512]>; 7560 7561def : Pat<(v4f32 (X86Movss 7562 (v4f32 VR128X:$dst), 7563 (v4f32 (scalar_to_vector 7564 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7565 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7566 Requires<[HasAVX512]>; 7567 7568def : Pat<(v2f64 (X86Movsd 7569 (v2f64 VR128X:$dst), 7570 (v2f64 (scalar_to_vector 7571 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7572 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7573 Requires<[HasAVX512]>; 7574 7575//===----------------------------------------------------------------------===// 7576// AVX-512 Vector convert from signed/unsigned integer to float/double 7577// and from float/double to signed/unsigned integer 7578//===----------------------------------------------------------------------===// 7579 7580multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7581 X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode, 7582 X86FoldableSchedWrite sched, 7583 string Broadcast = _.BroadcastStr, 7584 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7585 RegisterClass MaskRC = _.KRCWM, 7586 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7587 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7588let Uses = [MXCSR], mayRaiseFPException = 1 in { 7589 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7590 (ins _Src.RC:$src), 7591 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7592 (ins MaskRC:$mask, _Src.RC:$src), 7593 OpcodeStr, "$src", "$src", 7594 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7595 (vselect_mask MaskRC:$mask, 7596 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7597 _.RC:$src0), 7598 (vselect_mask MaskRC:$mask, 7599 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7600 _.ImmAllZerosV)>, 7601 EVEX, Sched<[sched]>; 7602 7603 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7604 (ins MemOp:$src), 7605 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7606 (ins MaskRC:$mask, MemOp:$src), 7607 OpcodeStr#Alias, "$src", "$src", 7608 LdDAG, 7609 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 7610 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 7611 EVEX, Sched<[sched.Folded]>; 7612 7613 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7614 (ins _Src.ScalarMemOp:$src), 7615 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7616 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7617 OpcodeStr, 7618 "${src}"#Broadcast, "${src}"#Broadcast, 7619 (_.VT (OpNode (_Src.VT 7620 (_Src.BroadcastLdFrag addr:$src)) 7621 )), 7622 (vselect_mask MaskRC:$mask, 7623 (_.VT 7624 (MaskOpNode 7625 (_Src.VT 7626 (_Src.BroadcastLdFrag addr:$src)))), 7627 _.RC:$src0), 7628 (vselect_mask MaskRC:$mask, 7629 (_.VT 7630 (MaskOpNode 7631 (_Src.VT 7632 (_Src.BroadcastLdFrag addr:$src)))), 7633 _.ImmAllZerosV)>, 7634 EVEX, EVEX_B, Sched<[sched.Folded]>; 7635 } 7636} 7637// Conversion with SAE - suppress all exceptions 7638multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7639 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7640 X86FoldableSchedWrite sched> { 7641 let Uses = [MXCSR] in 7642 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7643 (ins _Src.RC:$src), OpcodeStr, 7644 "{sae}, $src", "$src, {sae}", 7645 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7646 EVEX, EVEX_B, Sched<[sched]>; 7647} 7648 7649// Conversion with rounding control (RC) 7650multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7651 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7652 X86FoldableSchedWrite sched> { 7653 let Uses = [MXCSR] in 7654 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7655 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7656 "$rc, $src", "$src, $rc", 7657 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7658 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7659} 7660 7661// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7662multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7663 X86VectorVTInfo _Src, SDNode OpNode, 7664 SDNode MaskOpNode, 7665 X86FoldableSchedWrite sched, 7666 string Broadcast = _.BroadcastStr, 7667 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7668 RegisterClass MaskRC = _.KRCWM> 7669 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 7670 Alias, MemOp, MaskRC, 7671 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 7672 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7673 7674// Extend Float to Double 7675multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, 7676 X86SchedWriteWidths sched> { 7677 let Predicates = [HasAVX512] in { 7678 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, 7679 any_fpextend, fpextend, sched.ZMM>, 7680 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, 7681 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7682 } 7683 let Predicates = [HasVLX] in { 7684 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, 7685 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", 7686 "", f64mem>, EVEX_V128; 7687 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, 7688 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 7689 } 7690} 7691 7692// Truncate Double to Float 7693multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7694 let Predicates = [HasAVX512] in { 7695 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, 7696 X86any_vfpround, X86vfpround, sched.ZMM>, 7697 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, 7698 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7699 } 7700 let Predicates = [HasVLX] in { 7701 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, 7702 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 7703 f128mem, VK2WM>, EVEX_V128; 7704 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, 7705 X86any_vfpround, X86vfpround, 7706 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7707 } 7708 7709 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7710 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7711 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7712 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7713 VK2WM:$mask, VR128X:$src), 0, "att">; 7714 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 7715 "$dst {${mask}} {z}, $src}", 7716 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7717 VK2WM:$mask, VR128X:$src), 0, "att">; 7718 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7719 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7720 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7721 "$dst {${mask}}, ${src}{1to2}}", 7722 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7723 VK2WM:$mask, f64mem:$src), 0, "att">; 7724 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7725 "$dst {${mask}} {z}, ${src}{1to2}}", 7726 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7727 VK2WM:$mask, f64mem:$src), 0, "att">; 7728 7729 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7730 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7731 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7732 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7733 VK4WM:$mask, VR256X:$src), 0, "att">; 7734 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 7735 "$dst {${mask}} {z}, $src}", 7736 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7737 VK4WM:$mask, VR256X:$src), 0, "att">; 7738 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7739 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7740 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7741 "$dst {${mask}}, ${src}{1to4}}", 7742 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7743 VK4WM:$mask, f64mem:$src), 0, "att">; 7744 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7745 "$dst {${mask}} {z}, ${src}{1to4}}", 7746 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7747 VK4WM:$mask, f64mem:$src), 0, "att">; 7748} 7749 7750defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, 7751 VEX_W, PD, EVEX_CD8<64, CD8VF>; 7752defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, 7753 PS, EVEX_CD8<32, CD8VH>; 7754 7755let Predicates = [HasVLX] in { 7756 // Special patterns to allow use of X86vmfpround for masking. Instruction 7757 // patterns have been disabled with null_frag. 7758 def : Pat<(X86any_vfpround (v2f64 VR128X:$src)), 7759 (VCVTPD2PSZ128rr VR128X:$src)>; 7760 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), 7761 VK2WM:$mask), 7762 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 7763 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV, 7764 VK2WM:$mask), 7765 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 7766 7767 def : Pat<(X86any_vfpround (loadv2f64 addr:$src)), 7768 (VCVTPD2PSZ128rm addr:$src)>; 7769 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), 7770 VK2WM:$mask), 7771 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7772 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV, 7773 VK2WM:$mask), 7774 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; 7775 7776 def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))), 7777 (VCVTPD2PSZ128rmb addr:$src)>; 7778 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7779 (v4f32 VR128X:$src0), VK2WM:$mask), 7780 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7781 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7782 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 7783 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 7784} 7785 7786// Convert Signed/Unsigned Doubleword to Double 7787let Uses = []<Register>, mayRaiseFPException = 0 in 7788multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7789 SDNode MaskOpNode, SDNode OpNode128, 7790 SDNode MaskOpNode128, 7791 X86SchedWriteWidths sched> { 7792 // No rounding in this op 7793 let Predicates = [HasAVX512] in 7794 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 7795 MaskOpNode, sched.ZMM>, EVEX_V512; 7796 7797 let Predicates = [HasVLX] in { 7798 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 7799 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 7800 "", i64mem, VK2WM, 7801 (v2f64 (OpNode128 (bc_v4i32 7802 (v2i64 7803 (scalar_to_vector (loadi64 addr:$src)))))), 7804 (v2f64 (MaskOpNode128 (bc_v4i32 7805 (v2i64 7806 (scalar_to_vector (loadi64 addr:$src))))))>, 7807 EVEX_V128; 7808 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 7809 MaskOpNode, sched.YMM>, EVEX_V256; 7810 } 7811} 7812 7813// Convert Signed/Unsigned Doubleword to Float 7814multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7815 SDNode MaskOpNode, SDNode OpNodeRnd, 7816 X86SchedWriteWidths sched> { 7817 let Predicates = [HasAVX512] in 7818 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 7819 MaskOpNode, sched.ZMM>, 7820 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 7821 OpNodeRnd, sched.ZMM>, EVEX_V512; 7822 7823 let Predicates = [HasVLX] in { 7824 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 7825 MaskOpNode, sched.XMM>, EVEX_V128; 7826 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 7827 MaskOpNode, sched.YMM>, EVEX_V256; 7828 } 7829} 7830 7831// Convert Float to Signed/Unsigned Doubleword with truncation 7832multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7833 SDNode MaskOpNode, 7834 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7835 let Predicates = [HasAVX512] in { 7836 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7837 MaskOpNode, sched.ZMM>, 7838 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 7839 OpNodeSAE, sched.ZMM>, EVEX_V512; 7840 } 7841 let Predicates = [HasVLX] in { 7842 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7843 MaskOpNode, sched.XMM>, EVEX_V128; 7844 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7845 MaskOpNode, sched.YMM>, EVEX_V256; 7846 } 7847} 7848 7849// Convert Float to Signed/Unsigned Doubleword 7850multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7851 SDNode MaskOpNode, SDNode OpNodeRnd, 7852 X86SchedWriteWidths sched> { 7853 let Predicates = [HasAVX512] in { 7854 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7855 MaskOpNode, sched.ZMM>, 7856 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 7857 OpNodeRnd, sched.ZMM>, EVEX_V512; 7858 } 7859 let Predicates = [HasVLX] in { 7860 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7861 MaskOpNode, sched.XMM>, EVEX_V128; 7862 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7863 MaskOpNode, sched.YMM>, EVEX_V256; 7864 } 7865} 7866 7867// Convert Double to Signed/Unsigned Doubleword with truncation 7868multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7869 SDNode MaskOpNode, SDNode OpNodeSAE, 7870 X86SchedWriteWidths sched> { 7871 let Predicates = [HasAVX512] in { 7872 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7873 MaskOpNode, sched.ZMM>, 7874 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 7875 OpNodeSAE, sched.ZMM>, EVEX_V512; 7876 } 7877 let Predicates = [HasVLX] in { 7878 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7879 // memory forms of these instructions in Asm Parser. They have the same 7880 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7881 // due to the same reason. 7882 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7883 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7884 VK2WM>, EVEX_V128; 7885 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7886 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7887 } 7888 7889 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7890 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 7891 VR128X:$src), 0, "att">; 7892 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7893 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7894 VK2WM:$mask, VR128X:$src), 0, "att">; 7895 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7896 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7897 VK2WM:$mask, VR128X:$src), 0, "att">; 7898 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7899 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7900 f64mem:$src), 0, "att">; 7901 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7902 "$dst {${mask}}, ${src}{1to2}}", 7903 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7904 VK2WM:$mask, f64mem:$src), 0, "att">; 7905 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7906 "$dst {${mask}} {z}, ${src}{1to2}}", 7907 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7908 VK2WM:$mask, f64mem:$src), 0, "att">; 7909 7910 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7911 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 7912 VR256X:$src), 0, "att">; 7913 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7914 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7915 VK4WM:$mask, VR256X:$src), 0, "att">; 7916 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7917 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7918 VK4WM:$mask, VR256X:$src), 0, "att">; 7919 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7920 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7921 f64mem:$src), 0, "att">; 7922 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7923 "$dst {${mask}}, ${src}{1to4}}", 7924 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7925 VK4WM:$mask, f64mem:$src), 0, "att">; 7926 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7927 "$dst {${mask}} {z}, ${src}{1to4}}", 7928 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7929 VK4WM:$mask, f64mem:$src), 0, "att">; 7930} 7931 7932// Convert Double to Signed/Unsigned Doubleword 7933multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7934 SDNode MaskOpNode, SDNode OpNodeRnd, 7935 X86SchedWriteWidths sched> { 7936 let Predicates = [HasAVX512] in { 7937 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7938 MaskOpNode, sched.ZMM>, 7939 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 7940 OpNodeRnd, sched.ZMM>, EVEX_V512; 7941 } 7942 let Predicates = [HasVLX] in { 7943 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7944 // memory forms of these instructions in Asm Parcer. They have the same 7945 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7946 // due to the same reason. 7947 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7948 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7949 VK2WM>, EVEX_V128; 7950 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7951 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7952 } 7953 7954 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7955 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7956 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7957 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7958 VK2WM:$mask, VR128X:$src), 0, "att">; 7959 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7960 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7961 VK2WM:$mask, VR128X:$src), 0, "att">; 7962 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7963 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7964 f64mem:$src), 0, "att">; 7965 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7966 "$dst {${mask}}, ${src}{1to2}}", 7967 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7968 VK2WM:$mask, f64mem:$src), 0, "att">; 7969 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7970 "$dst {${mask}} {z}, ${src}{1to2}}", 7971 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7972 VK2WM:$mask, f64mem:$src), 0, "att">; 7973 7974 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7975 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7976 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7977 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7978 VK4WM:$mask, VR256X:$src), 0, "att">; 7979 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7980 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7981 VK4WM:$mask, VR256X:$src), 0, "att">; 7982 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7983 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7984 f64mem:$src), 0, "att">; 7985 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7986 "$dst {${mask}}, ${src}{1to4}}", 7987 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7988 VK4WM:$mask, f64mem:$src), 0, "att">; 7989 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7990 "$dst {${mask}} {z}, ${src}{1to4}}", 7991 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7992 VK4WM:$mask, f64mem:$src), 0, "att">; 7993} 7994 7995// Convert Double to Signed/Unsigned Quardword 7996multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7997 SDNode MaskOpNode, SDNode OpNodeRnd, 7998 X86SchedWriteWidths sched> { 7999 let Predicates = [HasDQI] in { 8000 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8001 MaskOpNode, sched.ZMM>, 8002 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8003 OpNodeRnd, sched.ZMM>, EVEX_V512; 8004 } 8005 let Predicates = [HasDQI, HasVLX] in { 8006 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8007 MaskOpNode, sched.XMM>, EVEX_V128; 8008 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8009 MaskOpNode, sched.YMM>, EVEX_V256; 8010 } 8011} 8012 8013// Convert Double to Signed/Unsigned Quardword with truncation 8014multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8015 SDNode MaskOpNode, SDNode OpNodeRnd, 8016 X86SchedWriteWidths sched> { 8017 let Predicates = [HasDQI] in { 8018 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8019 MaskOpNode, sched.ZMM>, 8020 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8021 OpNodeRnd, sched.ZMM>, EVEX_V512; 8022 } 8023 let Predicates = [HasDQI, HasVLX] in { 8024 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8025 MaskOpNode, sched.XMM>, EVEX_V128; 8026 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8027 MaskOpNode, sched.YMM>, EVEX_V256; 8028 } 8029} 8030 8031// Convert Signed/Unsigned Quardword to Double 8032multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 8033 SDNode MaskOpNode, SDNode OpNodeRnd, 8034 X86SchedWriteWidths sched> { 8035 let Predicates = [HasDQI] in { 8036 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8037 MaskOpNode, sched.ZMM>, 8038 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8039 OpNodeRnd, sched.ZMM>, EVEX_V512; 8040 } 8041 let Predicates = [HasDQI, HasVLX] in { 8042 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8043 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 8044 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8045 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 8046 } 8047} 8048 8049// Convert Float to Signed/Unsigned Quardword 8050multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8051 SDNode MaskOpNode, SDNode OpNodeRnd, 8052 X86SchedWriteWidths sched> { 8053 let Predicates = [HasDQI] in { 8054 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8055 MaskOpNode, sched.ZMM>, 8056 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8057 OpNodeRnd, sched.ZMM>, EVEX_V512; 8058 } 8059 let Predicates = [HasDQI, HasVLX] in { 8060 // Explicitly specified broadcast string, since we take only 2 elements 8061 // from v4f32x_info source 8062 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8063 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8064 (v2i64 (OpNode (bc_v4f32 8065 (v2f64 8066 (scalar_to_vector (loadf64 addr:$src)))))), 8067 (v2i64 (MaskOpNode (bc_v4f32 8068 (v2f64 8069 (scalar_to_vector (loadf64 addr:$src))))))>, 8070 EVEX_V128; 8071 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8072 MaskOpNode, sched.YMM>, EVEX_V256; 8073 } 8074} 8075 8076// Convert Float to Signed/Unsigned Quardword with truncation 8077multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8078 SDNode MaskOpNode, SDNode OpNodeRnd, 8079 X86SchedWriteWidths sched> { 8080 let Predicates = [HasDQI] in { 8081 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8082 MaskOpNode, sched.ZMM>, 8083 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8084 OpNodeRnd, sched.ZMM>, EVEX_V512; 8085 } 8086 let Predicates = [HasDQI, HasVLX] in { 8087 // Explicitly specified broadcast string, since we take only 2 elements 8088 // from v4f32x_info source 8089 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8090 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8091 (v2i64 (OpNode (bc_v4f32 8092 (v2f64 8093 (scalar_to_vector (loadf64 addr:$src)))))), 8094 (v2i64 (MaskOpNode (bc_v4f32 8095 (v2f64 8096 (scalar_to_vector (loadf64 addr:$src))))))>, 8097 EVEX_V128; 8098 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8099 MaskOpNode, sched.YMM>, EVEX_V256; 8100 } 8101} 8102 8103// Convert Signed/Unsigned Quardword to Float 8104multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 8105 SDNode MaskOpNode, SDNode OpNodeRnd, 8106 X86SchedWriteWidths sched> { 8107 let Predicates = [HasDQI] in { 8108 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, 8109 MaskOpNode, sched.ZMM>, 8110 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, 8111 OpNodeRnd, sched.ZMM>, EVEX_V512; 8112 } 8113 let Predicates = [HasDQI, HasVLX] in { 8114 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8115 // memory forms of these instructions in Asm Parcer. They have the same 8116 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8117 // due to the same reason. 8118 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag, 8119 null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>, 8120 EVEX_V128, NotEVEX2VEXConvertible; 8121 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, 8122 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256, 8123 NotEVEX2VEXConvertible; 8124 } 8125 8126 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8127 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8128 VR128X:$src), 0, "att">; 8129 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8130 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8131 VK2WM:$mask, VR128X:$src), 0, "att">; 8132 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8133 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8134 VK2WM:$mask, VR128X:$src), 0, "att">; 8135 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8136 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8137 i64mem:$src), 0, "att">; 8138 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8139 "$dst {${mask}}, ${src}{1to2}}", 8140 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8141 VK2WM:$mask, i64mem:$src), 0, "att">; 8142 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8143 "$dst {${mask}} {z}, ${src}{1to2}}", 8144 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8145 VK2WM:$mask, i64mem:$src), 0, "att">; 8146 8147 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8148 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8149 VR256X:$src), 0, "att">; 8150 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8151 "$dst {${mask}}, $src}", 8152 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8153 VK4WM:$mask, VR256X:$src), 0, "att">; 8154 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8155 "$dst {${mask}} {z}, $src}", 8156 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8157 VK4WM:$mask, VR256X:$src), 0, "att">; 8158 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8159 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8160 i64mem:$src), 0, "att">; 8161 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8162 "$dst {${mask}}, ${src}{1to4}}", 8163 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8164 VK4WM:$mask, i64mem:$src), 0, "att">; 8165 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8166 "$dst {${mask}} {z}, ${src}{1to4}}", 8167 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8168 VK4WM:$mask, i64mem:$src), 0, "att">; 8169} 8170 8171defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8172 X86any_VSintToFP, X86VSintToFP, 8173 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8174 8175defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8176 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8177 PS, EVEX_CD8<32, CD8VF>; 8178 8179defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8180 X86cvttp2si, X86cvttp2siSAE, 8181 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; 8182 8183defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8184 X86cvttp2si, X86cvttp2siSAE, 8185 SchedWriteCvtPD2DQ>, 8186 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8187 8188defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8189 X86cvttp2ui, X86cvttp2uiSAE, 8190 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; 8191 8192defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8193 X86cvttp2ui, X86cvttp2uiSAE, 8194 SchedWriteCvtPD2DQ>, 8195 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8196 8197defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8198 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8199 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8200 8201defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8202 uint_to_fp, X86VUintToFpRnd, 8203 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; 8204 8205defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8206 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8207 EVEX_CD8<32, CD8VF>; 8208 8209defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8210 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8211 VEX_W, EVEX_CD8<64, CD8VF>; 8212 8213defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8214 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8215 PS, EVEX_CD8<32, CD8VF>; 8216 8217defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8218 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8219 PS, EVEX_CD8<64, CD8VF>; 8220 8221defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8222 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8223 PD, EVEX_CD8<64, CD8VF>; 8224 8225defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8226 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8227 EVEX_CD8<32, CD8VH>; 8228 8229defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8230 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8231 PD, EVEX_CD8<64, CD8VF>; 8232 8233defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8234 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8235 EVEX_CD8<32, CD8VH>; 8236 8237defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8238 X86cvttp2si, X86cvttp2siSAE, 8239 SchedWriteCvtPD2DQ>, VEX_W, 8240 PD, EVEX_CD8<64, CD8VF>; 8241 8242defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8243 X86cvttp2si, X86cvttp2siSAE, 8244 SchedWriteCvtPS2DQ>, PD, 8245 EVEX_CD8<32, CD8VH>; 8246 8247defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8248 X86cvttp2ui, X86cvttp2uiSAE, 8249 SchedWriteCvtPD2DQ>, VEX_W, 8250 PD, EVEX_CD8<64, CD8VF>; 8251 8252defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8253 X86cvttp2ui, X86cvttp2uiSAE, 8254 SchedWriteCvtPS2DQ>, PD, 8255 EVEX_CD8<32, CD8VH>; 8256 8257defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8258 sint_to_fp, X86VSintToFpRnd, 8259 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; 8260 8261defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8262 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8263 VEX_W, XS, EVEX_CD8<64, CD8VF>; 8264 8265defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp, 8266 sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8267 VEX_W, PS, EVEX_CD8<64, CD8VF>; 8268 8269defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp, 8270 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>, 8271 VEX_W, XD, EVEX_CD8<64, CD8VF>; 8272 8273let Predicates = [HasVLX] in { 8274 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8275 // patterns have been disabled with null_frag. 8276 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8277 (VCVTPD2DQZ128rr VR128X:$src)>; 8278 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8279 VK2WM:$mask), 8280 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8281 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8282 VK2WM:$mask), 8283 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8284 8285 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8286 (VCVTPD2DQZ128rm addr:$src)>; 8287 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8288 VK2WM:$mask), 8289 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8290 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8291 VK2WM:$mask), 8292 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8293 8294 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8295 (VCVTPD2DQZ128rmb addr:$src)>; 8296 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8297 (v4i32 VR128X:$src0), VK2WM:$mask), 8298 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8299 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8300 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8301 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8302 8303 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8304 // patterns have been disabled with null_frag. 8305 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8306 (VCVTTPD2DQZ128rr VR128X:$src)>; 8307 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8308 VK2WM:$mask), 8309 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8310 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8311 VK2WM:$mask), 8312 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8313 8314 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8315 (VCVTTPD2DQZ128rm addr:$src)>; 8316 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8317 VK2WM:$mask), 8318 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8319 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8320 VK2WM:$mask), 8321 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8322 8323 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8324 (VCVTTPD2DQZ128rmb addr:$src)>; 8325 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8326 (v4i32 VR128X:$src0), VK2WM:$mask), 8327 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8328 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8329 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8330 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8331 8332 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8333 // patterns have been disabled with null_frag. 8334 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8335 (VCVTPD2UDQZ128rr VR128X:$src)>; 8336 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8337 VK2WM:$mask), 8338 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8339 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8340 VK2WM:$mask), 8341 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8342 8343 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8344 (VCVTPD2UDQZ128rm addr:$src)>; 8345 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8346 VK2WM:$mask), 8347 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8348 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8349 VK2WM:$mask), 8350 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8351 8352 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8353 (VCVTPD2UDQZ128rmb addr:$src)>; 8354 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8355 (v4i32 VR128X:$src0), VK2WM:$mask), 8356 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8357 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8358 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8359 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8360 8361 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8362 // patterns have been disabled with null_frag. 8363 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8364 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8365 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8366 VK2WM:$mask), 8367 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8368 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8369 VK2WM:$mask), 8370 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8371 8372 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 8373 (VCVTTPD2UDQZ128rm addr:$src)>; 8374 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8375 VK2WM:$mask), 8376 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8377 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8378 VK2WM:$mask), 8379 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8380 8381 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8382 (VCVTTPD2UDQZ128rmb addr:$src)>; 8383 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8384 (v4i32 VR128X:$src0), VK2WM:$mask), 8385 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8386 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8387 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8388 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8389} 8390 8391let Predicates = [HasDQI, HasVLX] in { 8392 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8393 (VCVTPS2QQZ128rm addr:$src)>; 8394 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8395 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8396 VR128X:$src0)), 8397 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8398 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8399 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8400 v2i64x_info.ImmAllZerosV)), 8401 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8402 8403 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8404 (VCVTPS2UQQZ128rm addr:$src)>; 8405 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8406 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8407 VR128X:$src0)), 8408 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8409 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8410 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8411 v2i64x_info.ImmAllZerosV)), 8412 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8413 8414 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8415 (VCVTTPS2QQZ128rm addr:$src)>; 8416 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8417 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8418 VR128X:$src0)), 8419 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8420 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8421 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8422 v2i64x_info.ImmAllZerosV)), 8423 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8424 8425 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8426 (VCVTTPS2UQQZ128rm addr:$src)>; 8427 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8428 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8429 VR128X:$src0)), 8430 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8431 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8432 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8433 v2i64x_info.ImmAllZerosV)), 8434 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8435} 8436 8437let Predicates = [HasVLX] in { 8438 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8439 (VCVTDQ2PDZ128rm addr:$src)>; 8440 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8441 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8442 VR128X:$src0)), 8443 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8444 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8445 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8446 v2f64x_info.ImmAllZerosV)), 8447 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8448 8449 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8450 (VCVTUDQ2PDZ128rm addr:$src)>; 8451 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8452 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8453 VR128X:$src0)), 8454 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8455 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8456 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8457 v2f64x_info.ImmAllZerosV)), 8458 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8459} 8460 8461let Predicates = [HasDQI, HasVLX] in { 8462 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 8463 // patterns have been disabled with null_frag. 8464 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))), 8465 (VCVTQQ2PSZ128rr VR128X:$src)>; 8466 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8467 VK2WM:$mask), 8468 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8469 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8470 VK2WM:$mask), 8471 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8472 8473 def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))), 8474 (VCVTQQ2PSZ128rm addr:$src)>; 8475 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8476 VK2WM:$mask), 8477 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8478 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8479 VK2WM:$mask), 8480 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8481 8482 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8483 (VCVTQQ2PSZ128rmb addr:$src)>; 8484 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8485 (v4f32 VR128X:$src0), VK2WM:$mask), 8486 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8487 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8488 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8489 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8490 8491 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 8492 // patterns have been disabled with null_frag. 8493 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))), 8494 (VCVTUQQ2PSZ128rr VR128X:$src)>; 8495 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8496 VK2WM:$mask), 8497 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8498 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8499 VK2WM:$mask), 8500 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8501 8502 def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))), 8503 (VCVTUQQ2PSZ128rm addr:$src)>; 8504 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8505 VK2WM:$mask), 8506 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8507 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8508 VK2WM:$mask), 8509 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8510 8511 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8512 (VCVTUQQ2PSZ128rmb addr:$src)>; 8513 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8514 (v4f32 VR128X:$src0), VK2WM:$mask), 8515 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8516 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8517 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8518 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8519} 8520 8521//===----------------------------------------------------------------------===// 8522// Half precision conversion instructions 8523//===----------------------------------------------------------------------===// 8524 8525let Uses = [MXCSR], mayRaiseFPException = 1 in 8526multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8527 X86MemOperand x86memop, dag ld_dag, 8528 X86FoldableSchedWrite sched> { 8529 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8530 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8531 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 8532 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8533 T8PD, Sched<[sched]>; 8534 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8535 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8536 (X86any_cvtph2ps (_src.VT ld_dag)), 8537 (X86cvtph2ps (_src.VT ld_dag))>, 8538 T8PD, Sched<[sched.Folded]>; 8539} 8540 8541multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8542 X86FoldableSchedWrite sched> { 8543 let Uses = [MXCSR] in 8544 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8545 (ins _src.RC:$src), "vcvtph2ps", 8546 "{sae}, $src", "$src, {sae}", 8547 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8548 T8PD, EVEX_B, Sched<[sched]>; 8549} 8550 8551let Predicates = [HasAVX512] in 8552 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 8553 (load addr:$src), WriteCvtPH2PSZ>, 8554 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8555 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8556 8557let Predicates = [HasVLX] in { 8558 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8559 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 8560 EVEX_CD8<32, CD8VH>; 8561 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8562 (bitconvert (v2i64 (X86vzload64 addr:$src))), 8563 WriteCvtPH2PS>, EVEX, EVEX_V128, 8564 EVEX_CD8<32, CD8VH>; 8565 8566 // Pattern match vcvtph2ps of a scalar i64 load. 8567 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 8568 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8569 (VCVTPH2PSZ128rm addr:$src)>; 8570} 8571 8572multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8573 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8574let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8575 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8576 (ins _src.RC:$src1, i32u8imm:$src2), 8577 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8578 [(set _dest.RC:$dst, 8579 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8580 Sched<[RR]>; 8581 let Constraints = "$src0 = $dst" in 8582 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8583 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8584 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8585 [(set _dest.RC:$dst, 8586 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8587 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8588 Sched<[RR]>, EVEX_K; 8589 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8590 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8591 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8592 [(set _dest.RC:$dst, 8593 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8594 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8595 Sched<[RR]>, EVEX_KZ; 8596 let hasSideEffects = 0, mayStore = 1 in { 8597 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8598 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8599 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8600 Sched<[MR]>; 8601 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8602 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8603 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8604 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 8605 } 8606} 8607} 8608 8609multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8610 SchedWrite Sched> { 8611 let hasSideEffects = 0, Uses = [MXCSR] in 8612 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 8613 (outs _dest.RC:$dst), 8614 (ins _src.RC:$src1, i32u8imm:$src2), 8615 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 8616 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 8617} 8618 8619let Predicates = [HasAVX512] in { 8620 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8621 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8622 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8623 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8624 8625 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 8626 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 8627} 8628 8629let Predicates = [HasVLX] in { 8630 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 8631 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 8632 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 8633 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 8634 WriteCvtPS2PH, WriteCvtPS2PHSt>, 8635 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 8636 8637 def : Pat<(store (f64 (extractelt 8638 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 8639 (iPTR 0))), addr:$dst), 8640 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8641 def : Pat<(store (i64 (extractelt 8642 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 8643 (iPTR 0))), addr:$dst), 8644 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8645 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 8646 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 8647} 8648 8649// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 8650multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 8651 string OpcodeStr, Domain d, 8652 X86FoldableSchedWrite sched = WriteFComX> { 8653 let hasSideEffects = 0, Uses = [MXCSR] in 8654 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 8655 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 8656 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 8657} 8658 8659let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8660 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 8661 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8662 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 8663 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8664 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 8665 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8666 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 8667 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8668} 8669 8670let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8671 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 8672 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8673 EVEX_CD8<32, CD8VT1>; 8674 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 8675 "ucomisd", SSEPackedDouble>, PD, EVEX, 8676 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8677 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 8678 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8679 EVEX_CD8<32, CD8VT1>; 8680 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 8681 "comisd", SSEPackedDouble>, PD, EVEX, 8682 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8683 let isCodeGenOnly = 1 in { 8684 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 8685 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8686 EVEX_CD8<32, CD8VT1>; 8687 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 8688 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 8689 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8690 8691 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 8692 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8693 EVEX_CD8<32, CD8VT1>; 8694 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 8695 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 8696 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8697 } 8698} 8699 8700/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd 8701multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8702 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8703 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 8704 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8705 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8706 "$src2, $src1", "$src1, $src2", 8707 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8708 EVEX_4V, VEX_LIG, Sched<[sched]>; 8709 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8710 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8711 "$src2, $src1", "$src1, $src2", 8712 (OpNode (_.VT _.RC:$src1), 8713 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, 8714 Sched<[sched.Folded, sched.ReadAfterFold]>; 8715} 8716} 8717 8718defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 8719 f32x_info>, EVEX_CD8<32, CD8VT1>, 8720 T8PD; 8721defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 8722 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 8723 T8PD; 8724defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 8725 SchedWriteFRsqrt.Scl, f32x_info>, 8726 EVEX_CD8<32, CD8VT1>, T8PD; 8727defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 8728 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 8729 EVEX_CD8<64, CD8VT1>, T8PD; 8730 8731/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 8732multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 8733 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8734 let ExeDomain = _.ExeDomain in { 8735 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8736 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8737 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 8738 Sched<[sched]>; 8739 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8740 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8741 (OpNode (_.VT 8742 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 8743 Sched<[sched.Folded, sched.ReadAfterFold]>; 8744 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8745 (ins _.ScalarMemOp:$src), OpcodeStr, 8746 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8747 (OpNode (_.VT 8748 (_.BroadcastLdFrag addr:$src)))>, 8749 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8750 } 8751} 8752 8753let Uses = [MXCSR] in 8754multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 8755 X86SchedWriteWidths sched> { 8756 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM, 8757 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 8758 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM, 8759 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8760 8761 // Define only if AVX512VL feature is present. 8762 let Predicates = [HasVLX] in { 8763 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8764 OpNode, sched.XMM, v4f32x_info>, 8765 EVEX_V128, EVEX_CD8<32, CD8VF>; 8766 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8767 OpNode, sched.YMM, v8f32x_info>, 8768 EVEX_V256, EVEX_CD8<32, CD8VF>; 8769 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8770 OpNode, sched.XMM, v2f64x_info>, 8771 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 8772 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8773 OpNode, sched.YMM, v4f64x_info>, 8774 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 8775 } 8776} 8777 8778defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; 8779defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; 8780 8781/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 8782multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 8783 SDNode OpNode, SDNode OpNodeSAE, 8784 X86FoldableSchedWrite sched> { 8785 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 8786 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8787 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8788 "$src2, $src1", "$src1, $src2", 8789 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8790 Sched<[sched]>, SIMD_EXC; 8791 8792 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8793 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8794 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 8795 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8796 EVEX_B, Sched<[sched]>; 8797 8798 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8799 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8800 "$src2, $src1", "$src1, $src2", 8801 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 8802 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8803 } 8804} 8805 8806multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8807 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 8808 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 8809 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG; 8810 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 8811 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 8812} 8813 8814let Predicates = [HasERI] in { 8815 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 8816 SchedWriteFRcp.Scl>, T8PD, EVEX_4V; 8817 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 8818 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; 8819} 8820 8821defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 8822 SchedWriteFRnd.Scl>, T8PD, EVEX_4V; 8823/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 8824 8825multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8826 SDNode OpNode, X86FoldableSchedWrite sched> { 8827 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8828 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8829 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8830 (OpNode (_.VT _.RC:$src))>, 8831 Sched<[sched]>; 8832 8833 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8834 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8835 (OpNode (_.VT 8836 (bitconvert (_.LdFrag addr:$src))))>, 8837 Sched<[sched.Folded, sched.ReadAfterFold]>; 8838 8839 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8840 (ins _.ScalarMemOp:$src), OpcodeStr, 8841 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8842 (OpNode (_.VT 8843 (_.BroadcastLdFrag addr:$src)))>, 8844 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8845 } 8846} 8847multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8848 SDNode OpNode, X86FoldableSchedWrite sched> { 8849 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 8850 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8851 (ins _.RC:$src), OpcodeStr, 8852 "{sae}, $src", "$src, {sae}", 8853 (OpNode (_.VT _.RC:$src))>, 8854 EVEX_B, Sched<[sched]>; 8855} 8856 8857multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 8858 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8859 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8860 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 8861 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 8862 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8863 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 8864 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8865} 8866 8867multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 8868 SDNode OpNode, X86SchedWriteWidths sched> { 8869 // Define only if AVX512VL feature is present. 8870 let Predicates = [HasVLX] in { 8871 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 8872 sched.XMM>, 8873 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 8874 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 8875 sched.YMM>, 8876 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 8877 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 8878 sched.XMM>, 8879 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8880 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 8881 sched.YMM>, 8882 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8883 } 8884} 8885 8886let Predicates = [HasERI] in { 8887 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 8888 SchedWriteFRsqrt>, EVEX; 8889 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 8890 SchedWriteFRcp>, EVEX; 8891 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 8892 SchedWriteFAdd>, EVEX; 8893} 8894defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 8895 SchedWriteFRnd>, 8896 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 8897 SchedWriteFRnd>, EVEX; 8898 8899multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 8900 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8901 let ExeDomain = _.ExeDomain in 8902 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8903 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 8904 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 8905 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8906} 8907 8908multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 8909 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8910 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8911 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 8912 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8913 (_.VT (any_fsqrt _.RC:$src)), 8914 (_.VT (fsqrt _.RC:$src))>, EVEX, 8915 Sched<[sched]>; 8916 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 8917 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8918 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 8919 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 8920 Sched<[sched.Folded, sched.ReadAfterFold]>; 8921 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 8922 (ins _.ScalarMemOp:$src), OpcodeStr, 8923 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8924 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 8925 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 8926 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8927 } 8928} 8929 8930let Uses = [MXCSR], mayRaiseFPException = 1 in 8931multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 8932 X86SchedWriteSizes sched> { 8933 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8934 sched.PS.ZMM, v16f32_info>, 8935 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8936 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8937 sched.PD.ZMM, v8f64_info>, 8938 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8939 // Define only if AVX512VL feature is present. 8940 let Predicates = [HasVLX] in { 8941 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8942 sched.PS.XMM, v4f32x_info>, 8943 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 8944 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8945 sched.PS.YMM, v8f32x_info>, 8946 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 8947 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8948 sched.PD.XMM, v2f64x_info>, 8949 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8950 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8951 sched.PD.YMM, v4f64x_info>, 8952 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8953 } 8954} 8955 8956let Uses = [MXCSR] in 8957multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 8958 X86SchedWriteSizes sched> { 8959 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 8960 sched.PS.ZMM, v16f32_info>, 8961 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8962 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 8963 sched.PD.ZMM, v8f64_info>, 8964 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8965} 8966 8967multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 8968 X86VectorVTInfo _, string Name> { 8969 let ExeDomain = _.ExeDomain in { 8970 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8971 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8972 "$src2, $src1", "$src1, $src2", 8973 (X86fsqrts (_.VT _.RC:$src1), 8974 (_.VT _.RC:$src2))>, 8975 Sched<[sched]>, SIMD_EXC; 8976 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8977 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8978 "$src2, $src1", "$src1, $src2", 8979 (X86fsqrts (_.VT _.RC:$src1), 8980 (_.ScalarIntMemFrags addr:$src2))>, 8981 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8982 let Uses = [MXCSR] in 8983 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8984 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 8985 "$rc, $src2, $src1", "$src1, $src2, $rc", 8986 (X86fsqrtRnds (_.VT _.RC:$src1), 8987 (_.VT _.RC:$src2), 8988 (i32 timm:$rc))>, 8989 EVEX_B, EVEX_RC, Sched<[sched]>; 8990 8991 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { 8992 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 8993 (ins _.FRC:$src1, _.FRC:$src2), 8994 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8995 Sched<[sched]>, SIMD_EXC; 8996 let mayLoad = 1 in 8997 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 8998 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 8999 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9000 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9001 } 9002 } 9003 9004 let Predicates = [HasAVX512] in { 9005 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9006 (!cast<Instruction>(Name#Zr) 9007 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9008 } 9009 9010 let Predicates = [HasAVX512, OptForSize] in { 9011 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9012 (!cast<Instruction>(Name#Zm) 9013 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9014 } 9015} 9016 9017multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9018 X86SchedWriteSizes sched> { 9019 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9020 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9021 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9022 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9023} 9024 9025defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9026 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9027 9028defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9029 9030multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9031 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9032 let ExeDomain = _.ExeDomain in { 9033 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9034 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9035 "$src3, $src2, $src1", "$src1, $src2, $src3", 9036 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9037 (i32 timm:$src3)))>, 9038 Sched<[sched]>, SIMD_EXC; 9039 9040 let Uses = [MXCSR] in 9041 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9042 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9043 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9044 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9045 (i32 timm:$src3)))>, EVEX_B, 9046 Sched<[sched]>; 9047 9048 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9049 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9050 OpcodeStr, 9051 "$src3, $src2, $src1", "$src1, $src2, $src3", 9052 (_.VT (X86RndScales _.RC:$src1, 9053 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9054 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9055 9056 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9057 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9058 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9059 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9060 []>, Sched<[sched]>, SIMD_EXC; 9061 9062 let mayLoad = 1 in 9063 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9064 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9065 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9066 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9067 } 9068 } 9069 9070 let Predicates = [HasAVX512] in { 9071 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9072 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9073 _.FRC:$src1, timm:$src2))>; 9074 } 9075 9076 let Predicates = [HasAVX512, OptForSize] in { 9077 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9078 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9079 addr:$src1, timm:$src2))>; 9080 } 9081} 9082 9083defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9084 SchedWriteFRnd.Scl, f32x_info>, 9085 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9086 EVEX_CD8<32, CD8VT1>; 9087 9088defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9089 SchedWriteFRnd.Scl, f64x_info>, 9090 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9091 EVEX_CD8<64, CD8VT1>; 9092 9093multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9094 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9095 dag OutMask, Predicate BasePredicate> { 9096 let Predicates = [BasePredicate] in { 9097 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9098 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9099 (extractelt _.VT:$dst, (iPTR 0))))), 9100 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9101 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9102 9103 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9104 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9105 ZeroFP))), 9106 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9107 OutMask, _.VT:$src2, _.VT:$src1)>; 9108 } 9109} 9110 9111defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9112 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9113 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9114defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9115 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9116 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9117 9118 9119//------------------------------------------------- 9120// Integer truncate and extend operations 9121//------------------------------------------------- 9122 9123// PatFrags that contain a select and a truncate op. The take operands in the 9124// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9125// either to the multiclasses. 9126def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9127 (vselect_mask node:$mask, 9128 (trunc node:$src), node:$src0)>; 9129def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9130 (vselect_mask node:$mask, 9131 (X86vtruncs node:$src), node:$src0)>; 9132def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9133 (vselect_mask node:$mask, 9134 (X86vtruncus node:$src), node:$src0)>; 9135 9136multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9137 SDPatternOperator MaskNode, 9138 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9139 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9140 let ExeDomain = DestInfo.ExeDomain in { 9141 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9142 (ins SrcInfo.RC:$src), 9143 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9144 [(set DestInfo.RC:$dst, 9145 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9146 EVEX, Sched<[sched]>; 9147 let Constraints = "$src0 = $dst" in 9148 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9149 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9150 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9151 [(set DestInfo.RC:$dst, 9152 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9153 (DestInfo.VT DestInfo.RC:$src0), 9154 SrcInfo.KRCWM:$mask))]>, 9155 EVEX, EVEX_K, Sched<[sched]>; 9156 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9157 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9158 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9159 [(set DestInfo.RC:$dst, 9160 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9161 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9162 EVEX, EVEX_KZ, Sched<[sched]>; 9163 } 9164 9165 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9166 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9167 (ins x86memop:$dst, SrcInfo.RC:$src), 9168 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9169 EVEX, Sched<[sched.Folded]>; 9170 9171 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9172 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9173 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9174 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9175 }//mayStore = 1, hasSideEffects = 0 9176} 9177 9178multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9179 X86VectorVTInfo DestInfo, 9180 PatFrag truncFrag, PatFrag mtruncFrag, 9181 string Name> { 9182 9183 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9184 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9185 addr:$dst, SrcInfo.RC:$src)>; 9186 9187 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9188 SrcInfo.KRCWM:$mask), 9189 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9190 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9191} 9192 9193multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9194 SDNode OpNode256, SDNode OpNode512, 9195 SDPatternOperator MaskNode128, 9196 SDPatternOperator MaskNode256, 9197 SDPatternOperator MaskNode512, 9198 X86FoldableSchedWrite sched, 9199 AVX512VLVectorVTInfo VTSrcInfo, 9200 X86VectorVTInfo DestInfoZ128, 9201 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9202 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9203 X86MemOperand x86memopZ, PatFrag truncFrag, 9204 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9205 9206 let Predicates = [HasVLX, prd] in { 9207 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9208 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9209 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, 9210 truncFrag, mtruncFrag, NAME>, EVEX_V128; 9211 9212 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9213 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9214 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, 9215 truncFrag, mtruncFrag, NAME>, EVEX_V256; 9216 } 9217 let Predicates = [prd] in 9218 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9219 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9220 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, 9221 truncFrag, mtruncFrag, NAME>, EVEX_V512; 9222} 9223 9224multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9225 SDPatternOperator MaskNode, 9226 X86FoldableSchedWrite sched, PatFrag StoreNode, 9227 PatFrag MaskedStoreNode, SDNode InVecNode, 9228 SDPatternOperator InVecMaskNode> { 9229 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9230 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9231 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9232 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9233 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9234} 9235 9236multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9237 SDPatternOperator MaskNode, 9238 X86FoldableSchedWrite sched, PatFrag StoreNode, 9239 PatFrag MaskedStoreNode, SDNode InVecNode, 9240 SDPatternOperator InVecMaskNode> { 9241 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9242 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9243 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9244 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9245 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9246} 9247 9248multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9249 SDPatternOperator MaskNode, 9250 X86FoldableSchedWrite sched, PatFrag StoreNode, 9251 PatFrag MaskedStoreNode, SDNode InVecNode, 9252 SDPatternOperator InVecMaskNode> { 9253 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9254 InVecMaskNode, MaskNode, MaskNode, sched, 9255 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9256 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9257 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9258} 9259 9260multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9261 SDPatternOperator MaskNode, 9262 X86FoldableSchedWrite sched, PatFrag StoreNode, 9263 PatFrag MaskedStoreNode, SDNode InVecNode, 9264 SDPatternOperator InVecMaskNode> { 9265 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9266 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9267 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9268 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9269 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9270} 9271 9272multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9273 SDPatternOperator MaskNode, 9274 X86FoldableSchedWrite sched, PatFrag StoreNode, 9275 PatFrag MaskedStoreNode, SDNode InVecNode, 9276 SDPatternOperator InVecMaskNode> { 9277 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9278 InVecMaskNode, MaskNode, MaskNode, sched, 9279 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9280 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9281 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9282} 9283 9284multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9285 SDPatternOperator MaskNode, 9286 X86FoldableSchedWrite sched, PatFrag StoreNode, 9287 PatFrag MaskedStoreNode, SDNode InVecNode, 9288 SDPatternOperator InVecMaskNode> { 9289 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9290 InVecMaskNode, MaskNode, MaskNode, sched, 9291 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9292 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9293 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9294} 9295 9296defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc, 9297 WriteShuffle256, truncstorevi8, 9298 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9299defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs, 9300 WriteShuffle256, truncstore_s_vi8, 9301 masked_truncstore_s_vi8, X86vtruncs, 9302 X86vmtruncs>; 9303defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, 9304 select_truncus, WriteShuffle256, 9305 truncstore_us_vi8, masked_truncstore_us_vi8, 9306 X86vtruncus, X86vmtruncus>; 9307 9308defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9309 WriteShuffle256, truncstorevi16, 9310 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9311defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9312 WriteShuffle256, truncstore_s_vi16, 9313 masked_truncstore_s_vi16, X86vtruncs, 9314 X86vmtruncs>; 9315defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9316 select_truncus, WriteShuffle256, 9317 truncstore_us_vi16, masked_truncstore_us_vi16, 9318 X86vtruncus, X86vmtruncus>; 9319 9320defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9321 WriteShuffle256, truncstorevi32, 9322 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9323defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9324 WriteShuffle256, truncstore_s_vi32, 9325 masked_truncstore_s_vi32, X86vtruncs, 9326 X86vmtruncs>; 9327defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9328 select_truncus, WriteShuffle256, 9329 truncstore_us_vi32, masked_truncstore_us_vi32, 9330 X86vtruncus, X86vmtruncus>; 9331 9332defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9333 WriteShuffle256, truncstorevi8, 9334 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9335defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9336 WriteShuffle256, truncstore_s_vi8, 9337 masked_truncstore_s_vi8, X86vtruncs, 9338 X86vmtruncs>; 9339defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9340 select_truncus, WriteShuffle256, 9341 truncstore_us_vi8, masked_truncstore_us_vi8, 9342 X86vtruncus, X86vmtruncus>; 9343 9344defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9345 WriteShuffle256, truncstorevi16, 9346 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9347defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9348 WriteShuffle256, truncstore_s_vi16, 9349 masked_truncstore_s_vi16, X86vtruncs, 9350 X86vmtruncs>; 9351defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9352 select_truncus, WriteShuffle256, 9353 truncstore_us_vi16, masked_truncstore_us_vi16, 9354 X86vtruncus, X86vmtruncus>; 9355 9356defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9357 WriteShuffle256, truncstorevi8, 9358 masked_truncstorevi8, X86vtrunc, 9359 X86vmtrunc>; 9360defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9361 WriteShuffle256, truncstore_s_vi8, 9362 masked_truncstore_s_vi8, X86vtruncs, 9363 X86vmtruncs>; 9364defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9365 select_truncus, WriteShuffle256, 9366 truncstore_us_vi8, masked_truncstore_us_vi8, 9367 X86vtruncus, X86vmtruncus>; 9368 9369let Predicates = [HasAVX512, NoVLX] in { 9370def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9371 (v8i16 (EXTRACT_SUBREG 9372 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9373 VR256X:$src, sub_ymm)))), sub_xmm))>; 9374def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9375 (v4i32 (EXTRACT_SUBREG 9376 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9377 VR256X:$src, sub_ymm)))), sub_xmm))>; 9378} 9379 9380let Predicates = [HasBWI, NoVLX] in { 9381def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9382 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9383 VR256X:$src, sub_ymm))), sub_xmm))>; 9384} 9385 9386// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9387multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9388 X86VectorVTInfo DestInfo, 9389 X86VectorVTInfo SrcInfo> { 9390 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9391 DestInfo.RC:$src0, 9392 SrcInfo.KRCWM:$mask)), 9393 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9394 SrcInfo.KRCWM:$mask, 9395 SrcInfo.RC:$src)>; 9396 9397 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9398 DestInfo.ImmAllZerosV, 9399 SrcInfo.KRCWM:$mask)), 9400 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9401 SrcInfo.RC:$src)>; 9402} 9403 9404let Predicates = [HasVLX] in { 9405defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9406defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9407defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9408} 9409 9410let Predicates = [HasAVX512] in { 9411defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9412defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9413defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9414 9415defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9416defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9417defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9418 9419defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9420defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9421defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9422} 9423 9424multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9425 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9426 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9427 let ExeDomain = DestInfo.ExeDomain in { 9428 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9429 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9430 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9431 EVEX, Sched<[sched]>; 9432 9433 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9434 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9435 (DestInfo.VT (LdFrag addr:$src))>, 9436 EVEX, Sched<[sched.Folded]>; 9437 } 9438} 9439 9440multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 9441 SDNode OpNode, SDNode InVecNode, string ExtTy, 9442 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9443 let Predicates = [HasVLX, HasBWI] in { 9444 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 9445 v16i8x_info, i64mem, LdFrag, InVecNode>, 9446 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9447 9448 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 9449 v16i8x_info, i128mem, LdFrag, OpNode>, 9450 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9451 } 9452 let Predicates = [HasBWI] in { 9453 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 9454 v32i8x_info, i256mem, LdFrag, OpNode>, 9455 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9456 } 9457} 9458 9459multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 9460 SDNode OpNode, SDNode InVecNode, string ExtTy, 9461 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9462 let Predicates = [HasVLX, HasAVX512] in { 9463 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9464 v16i8x_info, i32mem, LdFrag, InVecNode>, 9465 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9466 9467 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9468 v16i8x_info, i64mem, LdFrag, InVecNode>, 9469 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9470 } 9471 let Predicates = [HasAVX512] in { 9472 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9473 v16i8x_info, i128mem, LdFrag, OpNode>, 9474 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9475 } 9476} 9477 9478multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 9479 SDNode OpNode, SDNode InVecNode, string ExtTy, 9480 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9481 let Predicates = [HasVLX, HasAVX512] in { 9482 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9483 v16i8x_info, i16mem, LdFrag, InVecNode>, 9484 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 9485 9486 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9487 v16i8x_info, i32mem, LdFrag, InVecNode>, 9488 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 9489 } 9490 let Predicates = [HasAVX512] in { 9491 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9492 v16i8x_info, i64mem, LdFrag, InVecNode>, 9493 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 9494 } 9495} 9496 9497multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 9498 SDNode OpNode, SDNode InVecNode, string ExtTy, 9499 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9500 let Predicates = [HasVLX, HasAVX512] in { 9501 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9502 v8i16x_info, i64mem, LdFrag, InVecNode>, 9503 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9504 9505 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9506 v8i16x_info, i128mem, LdFrag, OpNode>, 9507 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9508 } 9509 let Predicates = [HasAVX512] in { 9510 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9511 v16i16x_info, i256mem, LdFrag, OpNode>, 9512 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9513 } 9514} 9515 9516multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 9517 SDNode OpNode, SDNode InVecNode, string ExtTy, 9518 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9519 let Predicates = [HasVLX, HasAVX512] in { 9520 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9521 v8i16x_info, i32mem, LdFrag, InVecNode>, 9522 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9523 9524 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9525 v8i16x_info, i64mem, LdFrag, InVecNode>, 9526 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9527 } 9528 let Predicates = [HasAVX512] in { 9529 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9530 v8i16x_info, i128mem, LdFrag, OpNode>, 9531 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9532 } 9533} 9534 9535multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 9536 SDNode OpNode, SDNode InVecNode, string ExtTy, 9537 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9538 9539 let Predicates = [HasVLX, HasAVX512] in { 9540 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9541 v4i32x_info, i64mem, LdFrag, InVecNode>, 9542 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 9543 9544 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9545 v4i32x_info, i128mem, LdFrag, OpNode>, 9546 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 9547 } 9548 let Predicates = [HasAVX512] in { 9549 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9550 v8i32x_info, i256mem, LdFrag, OpNode>, 9551 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 9552 } 9553} 9554 9555defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>; 9556defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>; 9557defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>; 9558defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>; 9559defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>; 9560defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>; 9561 9562defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>; 9563defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>; 9564defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>; 9565defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>; 9566defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>; 9567defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>; 9568 9569 9570// Patterns that we also need any extend versions of. aext_vector_inreg 9571// is currently legalized to zext_vector_inreg. 9572multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 9573 // 256-bit patterns 9574 let Predicates = [HasVLX, HasBWI] in { 9575 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 9576 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9577 } 9578 9579 let Predicates = [HasVLX] in { 9580 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 9581 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9582 9583 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 9584 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9585 } 9586 9587 // 512-bit patterns 9588 let Predicates = [HasBWI] in { 9589 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 9590 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 9591 } 9592 let Predicates = [HasAVX512] in { 9593 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 9594 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 9595 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 9596 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 9597 9598 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 9599 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 9600 9601 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 9602 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 9603 } 9604} 9605 9606multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 9607 SDNode InVecOp> : 9608 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 9609 // 128-bit patterns 9610 let Predicates = [HasVLX, HasBWI] in { 9611 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9612 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9613 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9614 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9615 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9616 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9617 } 9618 let Predicates = [HasVLX] in { 9619 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9620 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9621 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9622 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9623 9624 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 9625 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9626 9627 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9628 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9629 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9630 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9631 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9632 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9633 9634 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9635 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9636 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 9637 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9638 9639 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9640 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9641 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9642 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9643 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9644 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9645 } 9646 let Predicates = [HasVLX] in { 9647 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9648 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9649 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 9650 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9651 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9652 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9653 9654 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9655 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9656 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9657 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9658 9659 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9660 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9661 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 9662 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9663 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9664 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9665 } 9666 // 512-bit patterns 9667 let Predicates = [HasAVX512] in { 9668 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9669 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9670 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9671 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9672 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9673 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9674 } 9675} 9676 9677defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 9678defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 9679 9680// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 9681// ext+trunc aggressively making it impossible to legalize the DAG to this 9682// pattern directly. 9683let Predicates = [HasAVX512, NoBWI] in { 9684def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9685 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 9686def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 9687 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 9688} 9689 9690//===----------------------------------------------------------------------===// 9691// GATHER - SCATTER Operations 9692 9693// FIXME: Improve scheduling of gather/scatter instructions. 9694multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9695 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 9696 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 9697 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 9698 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 9699 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 9700 !strconcat(OpcodeStr#_.Suffix, 9701 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 9702 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; 9703} 9704 9705multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 9706 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9707 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 9708 vy512xmem>, EVEX_V512, VEX_W; 9709 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 9710 vz512mem>, EVEX_V512, VEX_W; 9711let Predicates = [HasVLX] in { 9712 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 9713 vx256xmem>, EVEX_V256, VEX_W; 9714 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 9715 vy256xmem>, EVEX_V256, VEX_W; 9716 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 9717 vx128xmem>, EVEX_V128, VEX_W; 9718 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9719 vx128xmem>, EVEX_V128, VEX_W; 9720} 9721} 9722 9723multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 9724 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9725 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 9726 EVEX_V512; 9727 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 9728 EVEX_V512; 9729let Predicates = [HasVLX] in { 9730 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 9731 vy256xmem>, EVEX_V256; 9732 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9733 vy128xmem>, EVEX_V256; 9734 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 9735 vx128xmem>, EVEX_V128; 9736 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9737 vx64xmem, VK2WM>, EVEX_V128; 9738} 9739} 9740 9741 9742defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 9743 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 9744 9745defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 9746 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 9747 9748multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9749 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 9750 9751let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 9752 hasSideEffects = 0 in 9753 9754 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 9755 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 9756 !strconcat(OpcodeStr#_.Suffix, 9757 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 9758 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9759 Sched<[WriteStore]>; 9760} 9761 9762multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 9763 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9764 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 9765 vy512xmem>, EVEX_V512, VEX_W; 9766 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 9767 vz512mem>, EVEX_V512, VEX_W; 9768let Predicates = [HasVLX] in { 9769 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 9770 vx256xmem>, EVEX_V256, VEX_W; 9771 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 9772 vy256xmem>, EVEX_V256, VEX_W; 9773 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 9774 vx128xmem>, EVEX_V128, VEX_W; 9775 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9776 vx128xmem>, EVEX_V128, VEX_W; 9777} 9778} 9779 9780multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 9781 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9782 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 9783 EVEX_V512; 9784 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 9785 EVEX_V512; 9786let Predicates = [HasVLX] in { 9787 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 9788 vy256xmem>, EVEX_V256; 9789 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9790 vy128xmem>, EVEX_V256; 9791 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 9792 vx128xmem>, EVEX_V128; 9793 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9794 vx64xmem, VK2WM>, EVEX_V128; 9795} 9796} 9797 9798defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 9799 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 9800 9801defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 9802 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 9803 9804// prefetch 9805multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 9806 RegisterClass KRC, X86MemOperand memop> { 9807 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 9808 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 9809 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 9810 EVEX, EVEX_K, Sched<[WriteLoad]>; 9811} 9812 9813defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 9814 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9815 9816defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 9817 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9818 9819defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 9820 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9821 9822defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 9823 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9824 9825defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 9826 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9827 9828defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 9829 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9830 9831defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 9832 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9833 9834defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 9835 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9836 9837defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 9838 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9839 9840defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 9841 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9842 9843defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 9844 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9845 9846defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 9847 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9848 9849defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 9850 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9851 9852defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 9853 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9854 9855defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 9856 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9857 9858defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 9859 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9860 9861multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 9862def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 9863 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 9864 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 9865 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 9866} 9867 9868multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 9869 string OpcodeStr, Predicate prd> { 9870let Predicates = [prd] in 9871 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 9872 9873 let Predicates = [prd, HasVLX] in { 9874 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 9875 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 9876 } 9877} 9878 9879defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 9880defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 9881defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 9882defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 9883 9884multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 9885 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 9886 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 9887 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 9888 EVEX, Sched<[WriteMove]>; 9889} 9890 9891// Use 512bit version to implement 128/256 bit in case NoVLX. 9892multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 9893 X86VectorVTInfo _, 9894 string Name> { 9895 9896 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 9897 (_.KVT (COPY_TO_REGCLASS 9898 (!cast<Instruction>(Name#"Zrr") 9899 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 9900 _.RC:$src, _.SubRegIdx)), 9901 _.KRC))>; 9902} 9903 9904multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 9905 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 9906 let Predicates = [prd] in 9907 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 9908 EVEX_V512; 9909 9910 let Predicates = [prd, HasVLX] in { 9911 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 9912 EVEX_V256; 9913 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 9914 EVEX_V128; 9915 } 9916 let Predicates = [prd, NoVLX] in { 9917 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 9918 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 9919 } 9920} 9921 9922defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 9923 avx512vl_i8_info, HasBWI>; 9924defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 9925 avx512vl_i16_info, HasBWI>, VEX_W; 9926defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 9927 avx512vl_i32_info, HasDQI>; 9928defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 9929 avx512vl_i64_info, HasDQI>, VEX_W; 9930 9931// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 9932// is available, but BWI is not. We can't handle this in lowering because 9933// a target independent DAG combine likes to combine sext and trunc. 9934let Predicates = [HasDQI, NoBWI] in { 9935 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 9936 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9937 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 9938 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9939} 9940 9941let Predicates = [HasDQI, NoBWI, HasVLX] in { 9942 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 9943 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9944} 9945 9946//===----------------------------------------------------------------------===// 9947// AVX-512 - COMPRESS and EXPAND 9948// 9949 9950multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 9951 string OpcodeStr, X86FoldableSchedWrite sched> { 9952 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 9953 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9954 (null_frag)>, AVX5128IBase, 9955 Sched<[sched]>; 9956 9957 let mayStore = 1, hasSideEffects = 0 in 9958 def mr : AVX5128I<opc, MRMDestMem, (outs), 9959 (ins _.MemOp:$dst, _.RC:$src), 9960 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9961 []>, EVEX_CD8<_.EltSize, CD8VT1>, 9962 Sched<[sched.Folded]>; 9963 9964 def mrk : AVX5128I<opc, MRMDestMem, (outs), 9965 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 9966 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9967 []>, 9968 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9969 Sched<[sched.Folded]>; 9970} 9971 9972multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9973 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 9974 (!cast<Instruction>(Name#_.ZSuffix#mrk) 9975 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 9976 9977 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 9978 (!cast<Instruction>(Name#_.ZSuffix#rrk) 9979 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 9980 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 9981 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 9982 _.KRCWM:$mask, _.RC:$src)>; 9983} 9984 9985multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 9986 X86FoldableSchedWrite sched, 9987 AVX512VLVectorVTInfo VTInfo, 9988 Predicate Pred = HasAVX512> { 9989 let Predicates = [Pred] in 9990 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 9991 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 9992 9993 let Predicates = [Pred, HasVLX] in { 9994 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 9995 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 9996 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 9997 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 9998 } 9999} 10000 10001// FIXME: Is there a better scheduler class for VPCOMPRESS? 10002defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10003 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 10004defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10005 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 10006defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10007 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 10008defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10009 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 10010 10011// expand 10012multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10013 string OpcodeStr, X86FoldableSchedWrite sched> { 10014 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10015 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10016 (null_frag)>, AVX5128IBase, 10017 Sched<[sched]>; 10018 10019 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10020 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10021 (null_frag)>, 10022 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10023 Sched<[sched.Folded, sched.ReadAfterFold]>; 10024} 10025 10026multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10027 10028 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10029 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10030 _.KRCWM:$mask, addr:$src)>; 10031 10032 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10033 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10034 _.KRCWM:$mask, addr:$src)>; 10035 10036 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10037 (_.VT _.RC:$src0))), 10038 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10039 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10040 10041 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10042 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10043 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10044 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10045 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10046 _.KRCWM:$mask, _.RC:$src)>; 10047} 10048 10049multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10050 X86FoldableSchedWrite sched, 10051 AVX512VLVectorVTInfo VTInfo, 10052 Predicate Pred = HasAVX512> { 10053 let Predicates = [Pred] in 10054 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10055 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10056 10057 let Predicates = [Pred, HasVLX] in { 10058 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10059 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10060 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10061 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10062 } 10063} 10064 10065// FIXME: Is there a better scheduler class for VPEXPAND? 10066defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10067 avx512vl_i32_info>, EVEX; 10068defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10069 avx512vl_i64_info>, EVEX, VEX_W; 10070defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10071 avx512vl_f32_info>, EVEX; 10072defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10073 avx512vl_f64_info>, EVEX, VEX_W; 10074 10075//handle instruction reg_vec1 = op(reg_vec,imm) 10076// op(mem_vec,imm) 10077// op(broadcast(eltVt),imm) 10078//all instruction created with FROUND_CURRENT 10079multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10080 SDNode OpNode, SDNode MaskOpNode, 10081 X86FoldableSchedWrite sched, 10082 X86VectorVTInfo _> { 10083 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10084 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10085 (ins _.RC:$src1, i32u8imm:$src2), 10086 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10087 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10088 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10089 Sched<[sched]>; 10090 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10091 (ins _.MemOp:$src1, i32u8imm:$src2), 10092 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10093 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10094 (i32 timm:$src2)), 10095 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10096 (i32 timm:$src2))>, 10097 Sched<[sched.Folded, sched.ReadAfterFold]>; 10098 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10099 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10100 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10101 "${src1}"#_.BroadcastStr#", $src2", 10102 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10103 (i32 timm:$src2)), 10104 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10105 (i32 timm:$src2))>, EVEX_B, 10106 Sched<[sched.Folded, sched.ReadAfterFold]>; 10107 } 10108} 10109 10110//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10111multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10112 SDNode OpNode, X86FoldableSchedWrite sched, 10113 X86VectorVTInfo _> { 10114 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10115 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10116 (ins _.RC:$src1, i32u8imm:$src2), 10117 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10118 "$src1, {sae}, $src2", 10119 (OpNode (_.VT _.RC:$src1), 10120 (i32 timm:$src2))>, 10121 EVEX_B, Sched<[sched]>; 10122} 10123 10124multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10125 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10126 SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10127 Predicate prd>{ 10128 let Predicates = [prd] in { 10129 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10130 sched.ZMM, _.info512>, 10131 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10132 sched.ZMM, _.info512>, EVEX_V512; 10133 } 10134 let Predicates = [prd, HasVLX] in { 10135 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10136 sched.XMM, _.info128>, EVEX_V128; 10137 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10138 sched.YMM, _.info256>, EVEX_V256; 10139 } 10140} 10141 10142//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10143// op(reg_vec2,mem_vec,imm) 10144// op(reg_vec2,broadcast(eltVt),imm) 10145//all instruction created with FROUND_CURRENT 10146multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10147 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10148 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10149 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10150 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10151 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10152 (OpNode (_.VT _.RC:$src1), 10153 (_.VT _.RC:$src2), 10154 (i32 timm:$src3))>, 10155 Sched<[sched]>; 10156 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10157 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10158 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10159 (OpNode (_.VT _.RC:$src1), 10160 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10161 (i32 timm:$src3))>, 10162 Sched<[sched.Folded, sched.ReadAfterFold]>; 10163 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10164 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10165 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10166 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10167 (OpNode (_.VT _.RC:$src1), 10168 (_.VT (_.BroadcastLdFrag addr:$src2)), 10169 (i32 timm:$src3))>, EVEX_B, 10170 Sched<[sched.Folded, sched.ReadAfterFold]>; 10171 } 10172} 10173 10174//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10175// op(reg_vec2,mem_vec,imm) 10176multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10177 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10178 X86VectorVTInfo SrcInfo>{ 10179 let ExeDomain = DestInfo.ExeDomain in { 10180 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10181 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10182 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10183 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10184 (SrcInfo.VT SrcInfo.RC:$src2), 10185 (i8 timm:$src3)))>, 10186 Sched<[sched]>; 10187 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10188 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10189 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10190 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10191 (SrcInfo.VT (bitconvert 10192 (SrcInfo.LdFrag addr:$src2))), 10193 (i8 timm:$src3)))>, 10194 Sched<[sched.Folded, sched.ReadAfterFold]>; 10195 } 10196} 10197 10198//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10199// op(reg_vec2,mem_vec,imm) 10200// op(reg_vec2,broadcast(eltVt),imm) 10201multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10202 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10203 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10204 10205 let ExeDomain = _.ExeDomain in 10206 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10207 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10208 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10209 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10210 (OpNode (_.VT _.RC:$src1), 10211 (_.VT (_.BroadcastLdFrag addr:$src2)), 10212 (i8 timm:$src3))>, EVEX_B, 10213 Sched<[sched.Folded, sched.ReadAfterFold]>; 10214} 10215 10216//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10217// op(reg_vec2,mem_scalar,imm) 10218multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10219 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10220 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10221 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10222 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10223 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10224 (OpNode (_.VT _.RC:$src1), 10225 (_.VT _.RC:$src2), 10226 (i32 timm:$src3))>, 10227 Sched<[sched]>; 10228 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10229 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10230 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10231 (OpNode (_.VT _.RC:$src1), 10232 (_.ScalarIntMemFrags addr:$src2), 10233 (i32 timm:$src3))>, 10234 Sched<[sched.Folded, sched.ReadAfterFold]>; 10235 } 10236} 10237 10238//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10239multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10240 SDNode OpNode, X86FoldableSchedWrite sched, 10241 X86VectorVTInfo _> { 10242 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10243 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10244 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10245 OpcodeStr, "$src3, {sae}, $src2, $src1", 10246 "$src1, $src2, {sae}, $src3", 10247 (OpNode (_.VT _.RC:$src1), 10248 (_.VT _.RC:$src2), 10249 (i32 timm:$src3))>, 10250 EVEX_B, Sched<[sched]>; 10251} 10252 10253//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10254multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10255 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10256 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10257 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10258 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10259 OpcodeStr, "$src3, {sae}, $src2, $src1", 10260 "$src1, $src2, {sae}, $src3", 10261 (OpNode (_.VT _.RC:$src1), 10262 (_.VT _.RC:$src2), 10263 (i32 timm:$src3))>, 10264 EVEX_B, Sched<[sched]>; 10265} 10266 10267multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10268 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10269 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10270 let Predicates = [prd] in { 10271 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10272 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10273 EVEX_V512; 10274 10275 } 10276 let Predicates = [prd, HasVLX] in { 10277 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10278 EVEX_V128; 10279 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10280 EVEX_V256; 10281 } 10282} 10283 10284multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10285 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10286 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10287 let Predicates = [Pred] in { 10288 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10289 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10290 } 10291 let Predicates = [Pred, HasVLX] in { 10292 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10293 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10294 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10295 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10296 } 10297} 10298 10299multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10300 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10301 Predicate Pred = HasAVX512> { 10302 let Predicates = [Pred] in { 10303 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10304 EVEX_V512; 10305 } 10306 let Predicates = [Pred, HasVLX] in { 10307 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10308 EVEX_V128; 10309 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10310 EVEX_V256; 10311 } 10312} 10313 10314multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10315 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10316 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10317 let Predicates = [prd] in { 10318 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10319 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10320 } 10321} 10322 10323multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10324 bits<8> opcPs, bits<8> opcPd, SDNode OpNode, 10325 SDNode MaskOpNode, SDNode OpNodeSAE, 10326 X86SchedWriteWidths sched, Predicate prd>{ 10327 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10328 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10329 EVEX_CD8<32, CD8VF>; 10330 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10331 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10332 EVEX_CD8<64, CD8VF>, VEX_W; 10333} 10334 10335defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10336 X86VReduce, X86VReduce, X86VReduceSAE, 10337 SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX; 10338defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10339 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 10340 SchedWriteFRnd, HasAVX512>, 10341 AVX512AIi8Base, EVEX; 10342defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10343 X86VGetMant, X86VGetMant, X86VGetMantSAE, 10344 SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX; 10345 10346defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10347 0x50, X86VRange, X86VRangeSAE, 10348 SchedWriteFAdd, HasDQI>, 10349 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10350defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10351 0x50, X86VRange, X86VRangeSAE, 10352 SchedWriteFAdd, HasDQI>, 10353 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10354 10355defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10356 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10357 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10358defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10359 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10360 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10361 10362defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10363 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10364 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10365defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10366 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10367 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10368 10369defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10370 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10371 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10372defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10373 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10374 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10375 10376multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10377 X86FoldableSchedWrite sched, 10378 X86VectorVTInfo _, 10379 X86VectorVTInfo CastInfo, 10380 string EVEX2VEXOvrd> { 10381 let ExeDomain = _.ExeDomain in { 10382 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10383 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10384 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10385 (_.VT (bitconvert 10386 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10387 (i8 timm:$src3)))))>, 10388 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 10389 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10390 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10391 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10392 (_.VT 10393 (bitconvert 10394 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10395 (CastInfo.LdFrag addr:$src2), 10396 (i8 timm:$src3)))))>, 10397 Sched<[sched.Folded, sched.ReadAfterFold]>, 10398 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 10399 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10400 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10401 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10402 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10403 (_.VT 10404 (bitconvert 10405 (CastInfo.VT 10406 (X86Shuf128 _.RC:$src1, 10407 (_.BroadcastLdFrag addr:$src2), 10408 (i8 timm:$src3)))))>, EVEX_B, 10409 Sched<[sched.Folded, sched.ReadAfterFold]>; 10410 } 10411} 10412 10413multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10414 AVX512VLVectorVTInfo _, 10415 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 10416 string EVEX2VEXOvrd>{ 10417 let Predicates = [HasAVX512] in 10418 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10419 _.info512, CastInfo.info512, "">, EVEX_V512; 10420 10421 let Predicates = [HasAVX512, HasVLX] in 10422 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10423 _.info256, CastInfo.info256, 10424 EVEX2VEXOvrd>, EVEX_V256; 10425} 10426 10427defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10428 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10429defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10430 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10431defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10432 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10433defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10434 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10435 10436let Predicates = [HasAVX512] in { 10437// Provide fallback in case the load node that is used in the broadcast 10438// patterns above is used by additional users, which prevents the pattern 10439// selection. 10440def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 10441 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10442 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10443 0)>; 10444def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 10445 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10446 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10447 0)>; 10448 10449def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 10450 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10451 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10452 0)>; 10453def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 10454 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10455 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10456 0)>; 10457 10458def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 10459 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10460 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10461 0)>; 10462 10463def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 10464 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10465 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10466 0)>; 10467} 10468 10469multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10470 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10471 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 10472 // instantiation of this class. 10473 let ExeDomain = _.ExeDomain in { 10474 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10475 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10476 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10477 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 10478 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 10479 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10480 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10481 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10482 (_.VT (X86VAlign _.RC:$src1, 10483 (bitconvert (_.LdFrag addr:$src2)), 10484 (i8 timm:$src3)))>, 10485 Sched<[sched.Folded, sched.ReadAfterFold]>, 10486 EVEX2VEXOverride<"VPALIGNRrmi">; 10487 10488 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10489 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10490 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10491 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10492 (X86VAlign _.RC:$src1, 10493 (_.VT (_.BroadcastLdFrag addr:$src2)), 10494 (i8 timm:$src3))>, EVEX_B, 10495 Sched<[sched.Folded, sched.ReadAfterFold]>; 10496 } 10497} 10498 10499multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10500 AVX512VLVectorVTInfo _> { 10501 let Predicates = [HasAVX512] in { 10502 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10503 AVX512AIi8Base, EVEX_4V, EVEX_V512; 10504 } 10505 let Predicates = [HasAVX512, HasVLX] in { 10506 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10507 AVX512AIi8Base, EVEX_4V, EVEX_V128; 10508 // We can't really override the 256-bit version so change it back to unset. 10509 let EVEX2VEXOverride = ? in 10510 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10511 AVX512AIi8Base, EVEX_4V, EVEX_V256; 10512 } 10513} 10514 10515defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10516 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10517defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10518 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10519 VEX_W; 10520 10521defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10522 SchedWriteShuffle, avx512vl_i8_info, 10523 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10524 10525// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10526// into vpalignr. 10527def ValignqImm32XForm : SDNodeXForm<timm, [{ 10528 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10529}]>; 10530def ValignqImm8XForm : SDNodeXForm<timm, [{ 10531 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10532}]>; 10533def ValigndImm8XForm : SDNodeXForm<timm, [{ 10534 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10535}]>; 10536 10537multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10538 X86VectorVTInfo From, X86VectorVTInfo To, 10539 SDNodeXForm ImmXForm> { 10540 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10541 (bitconvert 10542 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10543 timm:$src3))), 10544 To.RC:$src0)), 10545 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10546 To.RC:$src1, To.RC:$src2, 10547 (ImmXForm timm:$src3))>; 10548 10549 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10550 (bitconvert 10551 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10552 timm:$src3))), 10553 To.ImmAllZerosV)), 10554 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10555 To.RC:$src1, To.RC:$src2, 10556 (ImmXForm timm:$src3))>; 10557 10558 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10559 (bitconvert 10560 (From.VT (OpNode From.RC:$src1, 10561 (From.LdFrag addr:$src2), 10562 timm:$src3))), 10563 To.RC:$src0)), 10564 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10565 To.RC:$src1, addr:$src2, 10566 (ImmXForm timm:$src3))>; 10567 10568 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10569 (bitconvert 10570 (From.VT (OpNode From.RC:$src1, 10571 (From.LdFrag addr:$src2), 10572 timm:$src3))), 10573 To.ImmAllZerosV)), 10574 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 10575 To.RC:$src1, addr:$src2, 10576 (ImmXForm timm:$src3))>; 10577} 10578 10579multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 10580 X86VectorVTInfo From, 10581 X86VectorVTInfo To, 10582 SDNodeXForm ImmXForm> : 10583 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 10584 def : Pat<(From.VT (OpNode From.RC:$src1, 10585 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 10586 timm:$src3)), 10587 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 10588 (ImmXForm timm:$src3))>; 10589 10590 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10591 (bitconvert 10592 (From.VT (OpNode From.RC:$src1, 10593 (bitconvert 10594 (To.VT (To.BroadcastLdFrag addr:$src2))), 10595 timm:$src3))), 10596 To.RC:$src0)), 10597 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 10598 To.RC:$src1, addr:$src2, 10599 (ImmXForm timm:$src3))>; 10600 10601 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10602 (bitconvert 10603 (From.VT (OpNode From.RC:$src1, 10604 (bitconvert 10605 (To.VT (To.BroadcastLdFrag addr:$src2))), 10606 timm:$src3))), 10607 To.ImmAllZerosV)), 10608 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 10609 To.RC:$src1, addr:$src2, 10610 (ImmXForm timm:$src3))>; 10611} 10612 10613let Predicates = [HasAVX512] in { 10614 // For 512-bit we lower to the widest element type we can. So we only need 10615 // to handle converting valignq to valignd. 10616 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 10617 v16i32_info, ValignqImm32XForm>; 10618} 10619 10620let Predicates = [HasVLX] in { 10621 // For 128-bit we lower to the widest element type we can. So we only need 10622 // to handle converting valignq to valignd. 10623 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 10624 v4i32x_info, ValignqImm32XForm>; 10625 // For 256-bit we lower to the widest element type we can. So we only need 10626 // to handle converting valignq to valignd. 10627 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 10628 v8i32x_info, ValignqImm32XForm>; 10629} 10630 10631let Predicates = [HasVLX, HasBWI] in { 10632 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 10633 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 10634 v16i8x_info, ValignqImm8XForm>; 10635 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 10636 v16i8x_info, ValigndImm8XForm>; 10637} 10638 10639defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 10640 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 10641 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 10642 10643multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10644 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10645 let ExeDomain = _.ExeDomain in { 10646 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10647 (ins _.RC:$src1), OpcodeStr, 10648 "$src1", "$src1", 10649 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 10650 Sched<[sched]>; 10651 10652 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10653 (ins _.MemOp:$src1), OpcodeStr, 10654 "$src1", "$src1", 10655 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 10656 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 10657 Sched<[sched.Folded]>; 10658 } 10659} 10660 10661multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10662 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 10663 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 10664 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10665 (ins _.ScalarMemOp:$src1), OpcodeStr, 10666 "${src1}"#_.BroadcastStr, 10667 "${src1}"#_.BroadcastStr, 10668 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 10669 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 10670 Sched<[sched.Folded]>; 10671} 10672 10673multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10674 X86SchedWriteWidths sched, 10675 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10676 let Predicates = [prd] in 10677 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10678 EVEX_V512; 10679 10680 let Predicates = [prd, HasVLX] in { 10681 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10682 EVEX_V256; 10683 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10684 EVEX_V128; 10685 } 10686} 10687 10688multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10689 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 10690 Predicate prd> { 10691 let Predicates = [prd] in 10692 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10693 EVEX_V512; 10694 10695 let Predicates = [prd, HasVLX] in { 10696 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10697 EVEX_V256; 10698 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10699 EVEX_V128; 10700 } 10701} 10702 10703multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 10704 SDNode OpNode, X86SchedWriteWidths sched, 10705 Predicate prd> { 10706 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 10707 avx512vl_i64_info, prd>, VEX_W; 10708 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 10709 avx512vl_i32_info, prd>; 10710} 10711 10712multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 10713 SDNode OpNode, X86SchedWriteWidths sched, 10714 Predicate prd> { 10715 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 10716 avx512vl_i16_info, prd>, VEX_WIG; 10717 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 10718 avx512vl_i8_info, prd>, VEX_WIG; 10719} 10720 10721multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 10722 bits<8> opc_d, bits<8> opc_q, 10723 string OpcodeStr, SDNode OpNode, 10724 X86SchedWriteWidths sched> { 10725 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 10726 HasAVX512>, 10727 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 10728 HasBWI>; 10729} 10730 10731defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 10732 SchedWriteVecALU>; 10733 10734// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 10735let Predicates = [HasAVX512, NoVLX] in { 10736 def : Pat<(v4i64 (abs VR256X:$src)), 10737 (EXTRACT_SUBREG 10738 (VPABSQZrr 10739 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 10740 sub_ymm)>; 10741 def : Pat<(v2i64 (abs VR128X:$src)), 10742 (EXTRACT_SUBREG 10743 (VPABSQZrr 10744 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 10745 sub_xmm)>; 10746} 10747 10748// Use 512bit version to implement 128/256 bit. 10749multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 10750 AVX512VLVectorVTInfo _, Predicate prd> { 10751 let Predicates = [prd, NoVLX] in { 10752 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 10753 (EXTRACT_SUBREG 10754 (!cast<Instruction>(InstrStr # "Zrr") 10755 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10756 _.info256.RC:$src1, 10757 _.info256.SubRegIdx)), 10758 _.info256.SubRegIdx)>; 10759 10760 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 10761 (EXTRACT_SUBREG 10762 (!cast<Instruction>(InstrStr # "Zrr") 10763 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10764 _.info128.RC:$src1, 10765 _.info128.SubRegIdx)), 10766 _.info128.SubRegIdx)>; 10767 } 10768} 10769 10770defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 10771 SchedWriteVecIMul, HasCDI>; 10772 10773// FIXME: Is there a better scheduler class for VPCONFLICT? 10774defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 10775 SchedWriteVecALU, HasCDI>; 10776 10777// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 10778defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 10779defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 10780 10781//===---------------------------------------------------------------------===// 10782// Counts number of ones - VPOPCNTD and VPOPCNTQ 10783//===---------------------------------------------------------------------===// 10784 10785// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 10786defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 10787 SchedWriteVecALU, HasVPOPCNTDQ>; 10788 10789defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 10790defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 10791 10792//===---------------------------------------------------------------------===// 10793// Replicate Single FP - MOVSHDUP and MOVSLDUP 10794//===---------------------------------------------------------------------===// 10795 10796multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 10797 X86SchedWriteWidths sched> { 10798 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 10799 avx512vl_f32_info, HasAVX512>, XS; 10800} 10801 10802defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 10803 SchedWriteFShuffle>; 10804defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 10805 SchedWriteFShuffle>; 10806 10807//===----------------------------------------------------------------------===// 10808// AVX-512 - MOVDDUP 10809//===----------------------------------------------------------------------===// 10810 10811multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 10812 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10813 let ExeDomain = _.ExeDomain in { 10814 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10815 (ins _.RC:$src), OpcodeStr, "$src", "$src", 10816 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 10817 Sched<[sched]>; 10818 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10819 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 10820 (_.VT (_.BroadcastLdFrag addr:$src))>, 10821 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 10822 Sched<[sched.Folded]>; 10823 } 10824} 10825 10826multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 10827 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 10828 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 10829 VTInfo.info512>, EVEX_V512; 10830 10831 let Predicates = [HasAVX512, HasVLX] in { 10832 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 10833 VTInfo.info256>, EVEX_V256; 10834 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 10835 VTInfo.info128>, EVEX_V128; 10836 } 10837} 10838 10839multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, 10840 X86SchedWriteWidths sched> { 10841 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, 10842 avx512vl_f64_info>, XD, VEX_W; 10843} 10844 10845defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; 10846 10847let Predicates = [HasVLX] in { 10848def : Pat<(v2f64 (X86VBroadcast f64:$src)), 10849 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10850 10851def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10852 (v2f64 VR128X:$src0)), 10853 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 10854 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10855def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10856 immAllZerosV), 10857 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10858} 10859 10860//===----------------------------------------------------------------------===// 10861// AVX-512 - Unpack Instructions 10862//===----------------------------------------------------------------------===// 10863 10864let Uses = []<Register>, mayRaiseFPException = 0 in { 10865defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 10866 SchedWriteFShuffleSizes, 0, 1>; 10867defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 10868 SchedWriteFShuffleSizes>; 10869} 10870 10871defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 10872 SchedWriteShuffle, HasBWI>; 10873defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 10874 SchedWriteShuffle, HasBWI>; 10875defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 10876 SchedWriteShuffle, HasBWI>; 10877defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 10878 SchedWriteShuffle, HasBWI>; 10879 10880defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 10881 SchedWriteShuffle, HasAVX512>; 10882defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 10883 SchedWriteShuffle, HasAVX512>; 10884defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 10885 SchedWriteShuffle, HasAVX512>; 10886defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 10887 SchedWriteShuffle, HasAVX512>; 10888 10889//===----------------------------------------------------------------------===// 10890// AVX-512 - Extract & Insert Integer Instructions 10891//===----------------------------------------------------------------------===// 10892 10893multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10894 X86VectorVTInfo _> { 10895 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 10896 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10897 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10898 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), 10899 addr:$dst)]>, 10900 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 10901} 10902 10903multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 10904 let Predicates = [HasBWI] in { 10905 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 10906 (ins _.RC:$src1, u8imm:$src2), 10907 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10908 [(set GR32orGR64:$dst, 10909 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, 10910 EVEX, TAPD, Sched<[WriteVecExtract]>; 10911 10912 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 10913 } 10914} 10915 10916multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 10917 let Predicates = [HasBWI] in { 10918 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 10919 (ins _.RC:$src1, u8imm:$src2), 10920 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10921 [(set GR32orGR64:$dst, 10922 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, 10923 EVEX, PD, Sched<[WriteVecExtract]>; 10924 10925 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 10926 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 10927 (ins _.RC:$src1, u8imm:$src2), 10928 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 10929 EVEX, TAPD, FoldGenData<NAME#rr>, 10930 Sched<[WriteVecExtract]>; 10931 10932 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 10933 } 10934} 10935 10936multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 10937 RegisterClass GRC> { 10938 let Predicates = [HasDQI] in { 10939 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 10940 (ins _.RC:$src1, u8imm:$src2), 10941 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10942 [(set GRC:$dst, 10943 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 10944 EVEX, TAPD, Sched<[WriteVecExtract]>; 10945 10946 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 10947 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10948 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10949 [(store (extractelt (_.VT _.RC:$src1), 10950 imm:$src2),addr:$dst)]>, 10951 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 10952 Sched<[WriteVecExtractSt]>; 10953 } 10954} 10955 10956defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 10957defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 10958defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 10959defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 10960 10961multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10962 X86VectorVTInfo _, PatFrag LdFrag> { 10963 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 10964 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10965 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10966 [(set _.RC:$dst, 10967 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, 10968 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 10969} 10970 10971multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 10972 X86VectorVTInfo _, PatFrag LdFrag> { 10973 let Predicates = [HasBWI] in { 10974 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10975 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 10976 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10977 [(set _.RC:$dst, 10978 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V, 10979 Sched<[WriteVecInsert]>; 10980 10981 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>; 10982 } 10983} 10984 10985multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 10986 X86VectorVTInfo _, RegisterClass GRC> { 10987 let Predicates = [HasDQI] in { 10988 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10989 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 10990 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10991 [(set _.RC:$dst, 10992 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 10993 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 10994 10995 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 10996 _.ScalarLdFrag>, TAPD; 10997 } 10998} 10999 11000defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11001 extloadi8>, TAPD, VEX_WIG; 11002defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11003 extloadi16>, PD, VEX_WIG; 11004defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11005defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 11006 11007//===----------------------------------------------------------------------===// 11008// VSHUFPS - VSHUFPD Operations 11009//===----------------------------------------------------------------------===// 11010 11011multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, 11012 AVX512VLVectorVTInfo VTInfo_FP>{ 11013 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11014 SchedWriteFShuffle>, 11015 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11016 AVX512AIi8Base, EVEX_4V; 11017} 11018 11019defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; 11020defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; 11021 11022//===----------------------------------------------------------------------===// 11023// AVX-512 - Byte shift Left/Right 11024//===----------------------------------------------------------------------===// 11025 11026multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11027 Format MRMm, string OpcodeStr, 11028 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11029 def ri : AVX512<opc, MRMr, 11030 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11031 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11032 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11033 Sched<[sched]>; 11034 def mi : AVX512<opc, MRMm, 11035 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11036 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11037 [(set _.RC:$dst,(_.VT (OpNode 11038 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11039 (i8 timm:$src2))))]>, 11040 Sched<[sched.Folded, sched.ReadAfterFold]>; 11041} 11042 11043multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11044 Format MRMm, string OpcodeStr, 11045 X86SchedWriteWidths sched, Predicate prd>{ 11046 let Predicates = [prd] in 11047 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11048 sched.ZMM, v64i8_info>, EVEX_V512; 11049 let Predicates = [prd, HasVLX] in { 11050 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11051 sched.YMM, v32i8x_info>, EVEX_V256; 11052 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11053 sched.XMM, v16i8x_info>, EVEX_V128; 11054 } 11055} 11056defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11057 SchedWriteShuffle, HasBWI>, 11058 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11059defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11060 SchedWriteShuffle, HasBWI>, 11061 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11062 11063multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11064 string OpcodeStr, X86FoldableSchedWrite sched, 11065 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11066 let isCommutable = 1 in 11067 def rr : AVX512BI<opc, MRMSrcReg, 11068 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11069 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11070 [(set _dst.RC:$dst,(_dst.VT 11071 (OpNode (_src.VT _src.RC:$src1), 11072 (_src.VT _src.RC:$src2))))]>, 11073 Sched<[sched]>; 11074 def rm : AVX512BI<opc, MRMSrcMem, 11075 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11076 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11077 [(set _dst.RC:$dst,(_dst.VT 11078 (OpNode (_src.VT _src.RC:$src1), 11079 (_src.VT (bitconvert 11080 (_src.LdFrag addr:$src2))))))]>, 11081 Sched<[sched.Folded, sched.ReadAfterFold]>; 11082} 11083 11084multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11085 string OpcodeStr, X86SchedWriteWidths sched, 11086 Predicate prd> { 11087 let Predicates = [prd] in 11088 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11089 v8i64_info, v64i8_info>, EVEX_V512; 11090 let Predicates = [prd, HasVLX] in { 11091 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11092 v4i64x_info, v32i8x_info>, EVEX_V256; 11093 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11094 v2i64x_info, v16i8x_info>, EVEX_V128; 11095 } 11096} 11097 11098defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11099 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11100 11101// Transforms to swizzle an immediate to enable better matching when 11102// memory operand isn't in the right place. 11103def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11104 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11105 uint8_t Imm = N->getZExtValue(); 11106 // Swap bits 1/4 and 3/6. 11107 uint8_t NewImm = Imm & 0xa5; 11108 if (Imm & 0x02) NewImm |= 0x10; 11109 if (Imm & 0x10) NewImm |= 0x02; 11110 if (Imm & 0x08) NewImm |= 0x40; 11111 if (Imm & 0x40) NewImm |= 0x08; 11112 return getI8Imm(NewImm, SDLoc(N)); 11113}]>; 11114def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11115 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11116 uint8_t Imm = N->getZExtValue(); 11117 // Swap bits 2/4 and 3/5. 11118 uint8_t NewImm = Imm & 0xc3; 11119 if (Imm & 0x04) NewImm |= 0x10; 11120 if (Imm & 0x10) NewImm |= 0x04; 11121 if (Imm & 0x08) NewImm |= 0x20; 11122 if (Imm & 0x20) NewImm |= 0x08; 11123 return getI8Imm(NewImm, SDLoc(N)); 11124}]>; 11125def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11126 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11127 uint8_t Imm = N->getZExtValue(); 11128 // Swap bits 1/2 and 5/6. 11129 uint8_t NewImm = Imm & 0x99; 11130 if (Imm & 0x02) NewImm |= 0x04; 11131 if (Imm & 0x04) NewImm |= 0x02; 11132 if (Imm & 0x20) NewImm |= 0x40; 11133 if (Imm & 0x40) NewImm |= 0x20; 11134 return getI8Imm(NewImm, SDLoc(N)); 11135}]>; 11136def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11137 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11138 uint8_t Imm = N->getZExtValue(); 11139 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11140 uint8_t NewImm = Imm & 0x81; 11141 if (Imm & 0x02) NewImm |= 0x04; 11142 if (Imm & 0x04) NewImm |= 0x10; 11143 if (Imm & 0x08) NewImm |= 0x40; 11144 if (Imm & 0x10) NewImm |= 0x02; 11145 if (Imm & 0x20) NewImm |= 0x08; 11146 if (Imm & 0x40) NewImm |= 0x20; 11147 return getI8Imm(NewImm, SDLoc(N)); 11148}]>; 11149def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11150 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11151 uint8_t Imm = N->getZExtValue(); 11152 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11153 uint8_t NewImm = Imm & 0x81; 11154 if (Imm & 0x02) NewImm |= 0x10; 11155 if (Imm & 0x04) NewImm |= 0x02; 11156 if (Imm & 0x08) NewImm |= 0x20; 11157 if (Imm & 0x10) NewImm |= 0x04; 11158 if (Imm & 0x20) NewImm |= 0x40; 11159 if (Imm & 0x40) NewImm |= 0x08; 11160 return getI8Imm(NewImm, SDLoc(N)); 11161}]>; 11162 11163multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11164 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11165 string Name>{ 11166 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11167 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11168 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11169 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11170 (OpNode (_.VT _.RC:$src1), 11171 (_.VT _.RC:$src2), 11172 (_.VT _.RC:$src3), 11173 (i8 timm:$src4)), 1, 1>, 11174 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11175 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11176 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11177 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11178 (OpNode (_.VT _.RC:$src1), 11179 (_.VT _.RC:$src2), 11180 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11181 (i8 timm:$src4)), 1, 0>, 11182 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11183 Sched<[sched.Folded, sched.ReadAfterFold]>; 11184 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11185 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11186 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11187 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11188 (OpNode (_.VT _.RC:$src1), 11189 (_.VT _.RC:$src2), 11190 (_.VT (_.BroadcastLdFrag addr:$src3)), 11191 (i8 timm:$src4)), 1, 0>, EVEX_B, 11192 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11193 Sched<[sched.Folded, sched.ReadAfterFold]>; 11194 }// Constraints = "$src1 = $dst" 11195 11196 // Additional patterns for matching passthru operand in other positions. 11197 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11198 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11199 _.RC:$src1)), 11200 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11201 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11202 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11203 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11204 _.RC:$src1)), 11205 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11206 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11207 11208 // Additional patterns for matching loads in other positions. 11209 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), 11210 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), 11211 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11212 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11213 def : Pat<(_.VT (OpNode _.RC:$src1, 11214 (bitconvert (_.LdFrag addr:$src3)), 11215 _.RC:$src2, (i8 timm:$src4))), 11216 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11217 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11218 11219 // Additional patterns for matching zero masking with loads in other 11220 // positions. 11221 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11222 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11223 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11224 _.ImmAllZerosV)), 11225 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11226 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11227 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11228 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11229 _.RC:$src2, (i8 timm:$src4)), 11230 _.ImmAllZerosV)), 11231 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11232 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11233 11234 // Additional patterns for matching masked loads with different 11235 // operand orders. 11236 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11237 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11238 _.RC:$src2, (i8 timm:$src4)), 11239 _.RC:$src1)), 11240 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11241 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11242 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11243 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11244 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11245 _.RC:$src1)), 11246 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11247 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11248 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11249 (OpNode _.RC:$src2, _.RC:$src1, 11250 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11251 _.RC:$src1)), 11252 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11253 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11254 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11255 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11256 _.RC:$src1, (i8 timm:$src4)), 11257 _.RC:$src1)), 11258 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11259 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11260 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11261 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11262 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11263 _.RC:$src1)), 11264 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11265 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11266 11267 // Additional patterns for matching broadcasts in other positions. 11268 def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3), 11269 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), 11270 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11271 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11272 def : Pat<(_.VT (OpNode _.RC:$src1, 11273 (_.BroadcastLdFrag addr:$src3), 11274 _.RC:$src2, (i8 timm:$src4))), 11275 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11276 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11277 11278 // Additional patterns for matching zero masking with broadcasts in other 11279 // positions. 11280 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11281 (OpNode (_.BroadcastLdFrag addr:$src3), 11282 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11283 _.ImmAllZerosV)), 11284 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11285 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11286 (VPTERNLOG321_imm8 timm:$src4))>; 11287 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11288 (OpNode _.RC:$src1, 11289 (_.BroadcastLdFrag addr:$src3), 11290 _.RC:$src2, (i8 timm:$src4)), 11291 _.ImmAllZerosV)), 11292 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11293 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11294 (VPTERNLOG132_imm8 timm:$src4))>; 11295 11296 // Additional patterns for matching masked broadcasts with different 11297 // operand orders. 11298 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11299 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11300 _.RC:$src2, (i8 timm:$src4)), 11301 _.RC:$src1)), 11302 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11303 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11304 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11305 (OpNode (_.BroadcastLdFrag addr:$src3), 11306 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11307 _.RC:$src1)), 11308 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11309 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11310 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11311 (OpNode _.RC:$src2, _.RC:$src1, 11312 (_.BroadcastLdFrag addr:$src3), 11313 (i8 timm:$src4)), _.RC:$src1)), 11314 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11315 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11316 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11317 (OpNode _.RC:$src2, 11318 (_.BroadcastLdFrag addr:$src3), 11319 _.RC:$src1, (i8 timm:$src4)), 11320 _.RC:$src1)), 11321 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11322 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11323 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11324 (OpNode (_.BroadcastLdFrag addr:$src3), 11325 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11326 _.RC:$src1)), 11327 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11328 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11329} 11330 11331multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11332 AVX512VLVectorVTInfo _> { 11333 let Predicates = [HasAVX512] in 11334 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11335 _.info512, NAME>, EVEX_V512; 11336 let Predicates = [HasAVX512, HasVLX] in { 11337 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11338 _.info128, NAME>, EVEX_V128; 11339 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11340 _.info256, NAME>, EVEX_V256; 11341 } 11342} 11343 11344defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11345 avx512vl_i32_info>; 11346defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11347 avx512vl_i64_info>, VEX_W; 11348 11349// Patterns to use VPTERNLOG for vXi16/vXi8 vectors. 11350let Predicates = [HasVLX] in { 11351 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, 11352 (i8 timm:$src4))), 11353 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, 11354 timm:$src4)>; 11355 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, 11356 (loadv16i8 addr:$src3), (i8 timm:$src4))), 11357 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11358 timm:$src4)>; 11359 def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2, 11360 VR128X:$src1, (i8 timm:$src4))), 11361 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11362 (VPTERNLOG321_imm8 timm:$src4))>; 11363 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3), 11364 VR128X:$src2, (i8 timm:$src4))), 11365 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11366 (VPTERNLOG132_imm8 timm:$src4))>; 11367 11368 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, 11369 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11370 (i8 timm:$src4))), 11371 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11372 timm:$src4)>; 11373 def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11374 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), 11375 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11376 (VPTERNLOG321_imm8 timm:$src4))>; 11377 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, 11378 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11379 VR128X:$src2, (i8 timm:$src4))), 11380 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11381 (VPTERNLOG132_imm8 timm:$src4))>; 11382 11383 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, 11384 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11385 (i8 timm:$src4))), 11386 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11387 timm:$src4)>; 11388 def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11389 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), 11390 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11391 (VPTERNLOG321_imm8 timm:$src4))>; 11392 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, 11393 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11394 VR128X:$src2, (i8 timm:$src4))), 11395 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11396 (VPTERNLOG132_imm8 timm:$src4))>; 11397 11398 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, 11399 (i8 timm:$src4))), 11400 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, 11401 timm:$src4)>; 11402 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, 11403 (loadv8i16 addr:$src3), (i8 timm:$src4))), 11404 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11405 timm:$src4)>; 11406 def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2, 11407 VR128X:$src1, (i8 timm:$src4))), 11408 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11409 (VPTERNLOG321_imm8 timm:$src4))>; 11410 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3), 11411 VR128X:$src2, (i8 timm:$src4))), 11412 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, 11413 (VPTERNLOG132_imm8 timm:$src4))>; 11414 11415 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, 11416 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11417 (i8 timm:$src4))), 11418 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11419 timm:$src4)>; 11420 def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11421 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), 11422 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11423 (VPTERNLOG321_imm8 timm:$src4))>; 11424 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, 11425 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11426 VR128X:$src2, (i8 timm:$src4))), 11427 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11428 (VPTERNLOG132_imm8 timm:$src4))>; 11429 11430 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, 11431 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11432 (i8 timm:$src4))), 11433 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11434 timm:$src4)>; 11435 def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11436 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), 11437 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11438 (VPTERNLOG321_imm8 timm:$src4))>; 11439 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, 11440 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11441 VR128X:$src2, (i8 timm:$src4))), 11442 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11443 (VPTERNLOG132_imm8 timm:$src4))>; 11444 11445 def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2, 11446 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11447 (i8 timm:$src4))), 11448 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11449 timm:$src4)>; 11450 def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11451 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), 11452 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11453 (VPTERNLOG321_imm8 timm:$src4))>; 11454 def : Pat<(v4i32 (X86vpternlog VR128X:$src1, 11455 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), 11456 VR128X:$src2, (i8 timm:$src4))), 11457 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11458 (VPTERNLOG132_imm8 timm:$src4))>; 11459 11460 def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2, 11461 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11462 (i8 timm:$src4))), 11463 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11464 timm:$src4)>; 11465 def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11466 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), 11467 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11468 (VPTERNLOG321_imm8 timm:$src4))>; 11469 def : Pat<(v2i64 (X86vpternlog VR128X:$src1, 11470 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), 11471 VR128X:$src2, (i8 timm:$src4))), 11472 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, 11473 (VPTERNLOG132_imm8 timm:$src4))>; 11474 11475 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, 11476 (i8 timm:$src4))), 11477 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, 11478 timm:$src4)>; 11479 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, 11480 (loadv32i8 addr:$src3), (i8 timm:$src4))), 11481 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11482 timm:$src4)>; 11483 def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2, 11484 VR256X:$src1, (i8 timm:$src4))), 11485 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11486 (VPTERNLOG321_imm8 timm:$src4))>; 11487 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3), 11488 VR256X:$src2, (i8 timm:$src4))), 11489 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11490 (VPTERNLOG132_imm8 timm:$src4))>; 11491 11492 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, 11493 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11494 (i8 timm:$src4))), 11495 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11496 timm:$src4)>; 11497 def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11498 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), 11499 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11500 (VPTERNLOG321_imm8 timm:$src4))>; 11501 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, 11502 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11503 VR256X:$src2, (i8 timm:$src4))), 11504 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11505 (VPTERNLOG132_imm8 timm:$src4))>; 11506 11507 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, 11508 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11509 (i8 timm:$src4))), 11510 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11511 timm:$src4)>; 11512 def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11513 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), 11514 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11515 (VPTERNLOG321_imm8 timm:$src4))>; 11516 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, 11517 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11518 VR256X:$src2, (i8 timm:$src4))), 11519 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11520 (VPTERNLOG132_imm8 timm:$src4))>; 11521 11522 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, 11523 (i8 timm:$src4))), 11524 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, 11525 timm:$src4)>; 11526 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, 11527 (loadv16i16 addr:$src3), (i8 timm:$src4))), 11528 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11529 timm:$src4)>; 11530 def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2, 11531 VR256X:$src1, (i8 timm:$src4))), 11532 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11533 (VPTERNLOG321_imm8 timm:$src4))>; 11534 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3), 11535 VR256X:$src2, (i8 timm:$src4))), 11536 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, 11537 (VPTERNLOG132_imm8 timm:$src4))>; 11538 11539 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, 11540 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11541 (i8 timm:$src4))), 11542 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11543 timm:$src4)>; 11544 def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11545 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), 11546 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11547 (VPTERNLOG321_imm8 timm:$src4))>; 11548 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, 11549 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11550 VR256X:$src2, (i8 timm:$src4))), 11551 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11552 (VPTERNLOG132_imm8 timm:$src4))>; 11553 11554 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, 11555 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11556 (i8 timm:$src4))), 11557 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11558 timm:$src4)>; 11559 def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11560 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), 11561 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11562 (VPTERNLOG321_imm8 timm:$src4))>; 11563 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, 11564 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11565 VR256X:$src2, (i8 timm:$src4))), 11566 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11567 (VPTERNLOG132_imm8 timm:$src4))>; 11568 11569 def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2, 11570 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11571 (i8 timm:$src4))), 11572 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11573 timm:$src4)>; 11574 def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11575 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), 11576 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11577 (VPTERNLOG321_imm8 timm:$src4))>; 11578 def : Pat<(v8i32 (X86vpternlog VR256X:$src1, 11579 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), 11580 VR256X:$src2, (i8 timm:$src4))), 11581 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11582 (VPTERNLOG132_imm8 timm:$src4))>; 11583 11584 def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2, 11585 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11586 (i8 timm:$src4))), 11587 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11588 timm:$src4)>; 11589 def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11590 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), 11591 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11592 (VPTERNLOG321_imm8 timm:$src4))>; 11593 def : Pat<(v4i64 (X86vpternlog VR256X:$src1, 11594 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), 11595 VR256X:$src2, (i8 timm:$src4))), 11596 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, 11597 (VPTERNLOG132_imm8 timm:$src4))>; 11598} 11599 11600let Predicates = [HasAVX512] in { 11601 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, 11602 (i8 timm:$src4))), 11603 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, 11604 timm:$src4)>; 11605 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, 11606 (loadv64i8 addr:$src3), (i8 timm:$src4))), 11607 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11608 timm:$src4)>; 11609 def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2, 11610 VR512:$src1, (i8 timm:$src4))), 11611 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11612 (VPTERNLOG321_imm8 timm:$src4))>; 11613 def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3), 11614 VR512:$src2, (i8 timm:$src4))), 11615 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11616 (VPTERNLOG132_imm8 timm:$src4))>; 11617 11618 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, 11619 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11620 (i8 timm:$src4))), 11621 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11622 timm:$src4)>; 11623 def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11624 VR512:$src2, VR512:$src1, (i8 timm:$src4))), 11625 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11626 (VPTERNLOG321_imm8 timm:$src4))>; 11627 def : Pat<(v64i8 (X86vpternlog VR512:$src1, 11628 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11629 VR512:$src2, (i8 timm:$src4))), 11630 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11631 (VPTERNLOG132_imm8 timm:$src4))>; 11632 11633 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, 11634 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11635 (i8 timm:$src4))), 11636 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11637 timm:$src4)>; 11638 def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11639 VR512:$src2, VR512:$src1, (i8 timm:$src4))), 11640 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11641 (VPTERNLOG321_imm8 timm:$src4))>; 11642 def : Pat<(v64i8 (X86vpternlog VR512:$src1, 11643 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11644 VR512:$src2, (i8 timm:$src4))), 11645 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11646 (VPTERNLOG132_imm8 timm:$src4))>; 11647 11648 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, 11649 (i8 timm:$src4))), 11650 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, 11651 timm:$src4)>; 11652 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, 11653 (loadv32i16 addr:$src3), (i8 timm:$src4))), 11654 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11655 timm:$src4)>; 11656 def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2, 11657 VR512:$src1, (i8 timm:$src4))), 11658 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11659 (VPTERNLOG321_imm8 timm:$src4))>; 11660 def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), 11661 VR512:$src2, (i8 timm:$src4))), 11662 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, 11663 (VPTERNLOG132_imm8 timm:$src4))>; 11664 11665 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, 11666 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11667 (i8 timm:$src4))), 11668 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11669 timm:$src4)>; 11670 def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11671 VR512:$src2, VR512:$src1, (i8 timm:$src4))), 11672 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11673 (VPTERNLOG321_imm8 timm:$src4))>; 11674 def : Pat<(v32i16 (X86vpternlog VR512:$src1, 11675 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11676 VR512:$src2, (i8 timm:$src4))), 11677 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11678 (VPTERNLOG132_imm8 timm:$src4))>; 11679 11680 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, 11681 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11682 (i8 timm:$src4))), 11683 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11684 timm:$src4)>; 11685 def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11686 VR512:$src2, VR512:$src1, (i8 timm:$src4))), 11687 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11688 (VPTERNLOG321_imm8 timm:$src4))>; 11689 def : Pat<(v32i16 (X86vpternlog VR512:$src1, 11690 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11691 VR512:$src2, (i8 timm:$src4))), 11692 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11693 (VPTERNLOG132_imm8 timm:$src4))>; 11694 11695 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, 11696 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11697 (i8 timm:$src4))), 11698 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11699 timm:$src4)>; 11700 def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11701 VR512:$src2, VR512:$src1, (i8 timm:$src4))), 11702 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11703 (VPTERNLOG321_imm8 timm:$src4))>; 11704 def : Pat<(v32i16 (X86vpternlog VR512:$src1, 11705 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11706 VR512:$src2, (i8 timm:$src4))), 11707 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11708 (VPTERNLOG132_imm8 timm:$src4))>; 11709 11710 def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2, 11711 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11712 (i8 timm:$src4))), 11713 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11714 timm:$src4)>; 11715 def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11716 VR512:$src2, VR512:$src1, (i8 timm:$src4))), 11717 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11718 (VPTERNLOG321_imm8 timm:$src4))>; 11719 def : Pat<(v16i32 (X86vpternlog VR512:$src1, 11720 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), 11721 VR512:$src2, (i8 timm:$src4))), 11722 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11723 (VPTERNLOG132_imm8 timm:$src4))>; 11724 11725 def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2, 11726 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11727 (i8 timm:$src4))), 11728 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11729 timm:$src4)>; 11730 def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11731 VR512:$src2, VR512:$src1, (i8 timm:$src4))), 11732 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11733 (VPTERNLOG321_imm8 timm:$src4))>; 11734 def : Pat<(v8i64 (X86vpternlog VR512:$src1, 11735 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), 11736 VR512:$src2, (i8 timm:$src4))), 11737 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, 11738 (VPTERNLOG132_imm8 timm:$src4))>; 11739} 11740 11741// Patterns to implement vnot using vpternlog instead of creating all ones 11742// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11743// so that the result is only dependent on src0. But we use the same source 11744// for all operands to prevent a false dependency. 11745// TODO: We should maybe have a more generalized algorithm for folding to 11746// vpternlog. 11747let Predicates = [HasAVX512] in { 11748 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)), 11749 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11750 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)), 11751 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11752 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)), 11753 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11754 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)), 11755 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11756} 11757 11758let Predicates = [HasAVX512, NoVLX] in { 11759 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11760 (EXTRACT_SUBREG 11761 (VPTERNLOGQZrri 11762 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11763 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11764 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11765 (i8 15)), sub_xmm)>; 11766 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11767 (EXTRACT_SUBREG 11768 (VPTERNLOGQZrri 11769 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11770 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11771 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11772 (i8 15)), sub_xmm)>; 11773 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11774 (EXTRACT_SUBREG 11775 (VPTERNLOGQZrri 11776 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11777 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11778 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11779 (i8 15)), sub_xmm)>; 11780 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11781 (EXTRACT_SUBREG 11782 (VPTERNLOGQZrri 11783 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11784 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11785 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11786 (i8 15)), sub_xmm)>; 11787 11788 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11789 (EXTRACT_SUBREG 11790 (VPTERNLOGQZrri 11791 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11792 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11793 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11794 (i8 15)), sub_ymm)>; 11795 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11796 (EXTRACT_SUBREG 11797 (VPTERNLOGQZrri 11798 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11799 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11800 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11801 (i8 15)), sub_ymm)>; 11802 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11803 (EXTRACT_SUBREG 11804 (VPTERNLOGQZrri 11805 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11806 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11807 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11808 (i8 15)), sub_ymm)>; 11809 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11810 (EXTRACT_SUBREG 11811 (VPTERNLOGQZrri 11812 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11813 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11814 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11815 (i8 15)), sub_ymm)>; 11816} 11817 11818let Predicates = [HasVLX] in { 11819 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11820 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11821 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11822 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11823 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11824 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11825 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11826 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11827 11828 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11829 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11830 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11831 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11832 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11833 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11834 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11835 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11836} 11837 11838//===----------------------------------------------------------------------===// 11839// AVX-512 - FixupImm 11840//===----------------------------------------------------------------------===// 11841 11842multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11843 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11844 X86VectorVTInfo TblVT>{ 11845 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 11846 Uses = [MXCSR], mayRaiseFPException = 1 in { 11847 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11848 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11849 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11850 (X86VFixupimm (_.VT _.RC:$src1), 11851 (_.VT _.RC:$src2), 11852 (TblVT.VT _.RC:$src3), 11853 (i32 timm:$src4))>, Sched<[sched]>; 11854 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11855 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11856 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11857 (X86VFixupimm (_.VT _.RC:$src1), 11858 (_.VT _.RC:$src2), 11859 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11860 (i32 timm:$src4))>, 11861 Sched<[sched.Folded, sched.ReadAfterFold]>; 11862 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11863 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11864 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11865 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11866 (X86VFixupimm (_.VT _.RC:$src1), 11867 (_.VT _.RC:$src2), 11868 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 11869 (i32 timm:$src4))>, 11870 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11871 } // Constraints = "$src1 = $dst" 11872} 11873 11874multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11875 X86FoldableSchedWrite sched, 11876 X86VectorVTInfo _, X86VectorVTInfo TblVT> 11877 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 11878let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 11879 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11880 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11881 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11882 "$src2, $src3, {sae}, $src4", 11883 (X86VFixupimmSAE (_.VT _.RC:$src1), 11884 (_.VT _.RC:$src2), 11885 (TblVT.VT _.RC:$src3), 11886 (i32 timm:$src4))>, 11887 EVEX_B, Sched<[sched]>; 11888 } 11889} 11890 11891multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 11892 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11893 X86VectorVTInfo _src3VT> { 11894 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11895 ExeDomain = _.ExeDomain in { 11896 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11897 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11898 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11899 (X86VFixupimms (_.VT _.RC:$src1), 11900 (_.VT _.RC:$src2), 11901 (_src3VT.VT _src3VT.RC:$src3), 11902 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 11903 let Uses = [MXCSR] in 11904 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11905 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11906 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11907 "$src2, $src3, {sae}, $src4", 11908 (X86VFixupimmSAEs (_.VT _.RC:$src1), 11909 (_.VT _.RC:$src2), 11910 (_src3VT.VT _src3VT.RC:$src3), 11911 (i32 timm:$src4))>, 11912 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11913 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11914 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11915 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11916 (X86VFixupimms (_.VT _.RC:$src1), 11917 (_.VT _.RC:$src2), 11918 (_src3VT.VT (scalar_to_vector 11919 (_src3VT.ScalarLdFrag addr:$src3))), 11920 (i32 timm:$src4))>, 11921 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 11922 } 11923} 11924 11925multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11926 AVX512VLVectorVTInfo _Vec, 11927 AVX512VLVectorVTInfo _Tbl> { 11928 let Predicates = [HasAVX512] in 11929 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 11930 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11931 EVEX_4V, EVEX_V512; 11932 let Predicates = [HasAVX512, HasVLX] in { 11933 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 11934 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11935 EVEX_4V, EVEX_V128; 11936 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 11937 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11938 EVEX_4V, EVEX_V256; 11939 } 11940} 11941 11942defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11943 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11944 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11945defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11946 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11947 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11948defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11949 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11950defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11951 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 11952 11953// Patterns used to select SSE scalar fp arithmetic instructions from 11954// either: 11955// 11956// (1) a scalar fp operation followed by a blend 11957// 11958// The effect is that the backend no longer emits unnecessary vector 11959// insert instructions immediately after SSE scalar fp instructions 11960// like addss or mulss. 11961// 11962// For example, given the following code: 11963// __m128 foo(__m128 A, __m128 B) { 11964// A[0] += B[0]; 11965// return A; 11966// } 11967// 11968// Previously we generated: 11969// addss %xmm0, %xmm1 11970// movss %xmm1, %xmm0 11971// 11972// We now generate: 11973// addss %xmm1, %xmm0 11974// 11975// (2) a vector packed single/double fp operation followed by a vector insert 11976// 11977// The effect is that the backend converts the packed fp instruction 11978// followed by a vector insert into a single SSE scalar fp instruction. 11979// 11980// For example, given the following code: 11981// __m128 foo(__m128 A, __m128 B) { 11982// __m128 C = A + B; 11983// return (__m128) {c[0], a[1], a[2], a[3]}; 11984// } 11985// 11986// Previously we generated: 11987// addps %xmm0, %xmm1 11988// movss %xmm1, %xmm0 11989// 11990// We now generate: 11991// addss %xmm1, %xmm0 11992 11993// TODO: Some canonicalization in lowering would simplify the number of 11994// patterns we have to try to match. 11995multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp, 11996 string OpcPrefix, SDNode MoveNode, 11997 X86VectorVTInfo _, PatLeaf ZeroFP> { 11998 let Predicates = [HasAVX512] in { 11999 // extracted scalar math op with insert via movss 12000 def : Pat<(MoveNode 12001 (_.VT VR128X:$dst), 12002 (_.VT (scalar_to_vector 12003 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12004 _.FRC:$src)))), 12005 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12006 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12007 def : Pat<(MoveNode 12008 (_.VT VR128X:$dst), 12009 (_.VT (scalar_to_vector 12010 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12011 (_.ScalarLdFrag addr:$src))))), 12012 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12013 12014 // extracted masked scalar math op with insert via movss 12015 def : Pat<(MoveNode (_.VT VR128X:$src1), 12016 (scalar_to_vector 12017 (X86selects_mask VK1WM:$mask, 12018 (MaskedOp (_.EltVT 12019 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12020 _.FRC:$src2), 12021 _.FRC:$src0))), 12022 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12023 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12024 VK1WM:$mask, _.VT:$src1, 12025 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12026 def : Pat<(MoveNode (_.VT VR128X:$src1), 12027 (scalar_to_vector 12028 (X86selects_mask VK1WM:$mask, 12029 (MaskedOp (_.EltVT 12030 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12031 (_.ScalarLdFrag addr:$src2)), 12032 _.FRC:$src0))), 12033 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12034 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12035 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12036 12037 // extracted masked scalar math op with insert via movss 12038 def : Pat<(MoveNode (_.VT VR128X:$src1), 12039 (scalar_to_vector 12040 (X86selects_mask VK1WM:$mask, 12041 (MaskedOp (_.EltVT 12042 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12043 _.FRC:$src2), (_.EltVT ZeroFP)))), 12044 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12045 VK1WM:$mask, _.VT:$src1, 12046 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12047 def : Pat<(MoveNode (_.VT VR128X:$src1), 12048 (scalar_to_vector 12049 (X86selects_mask VK1WM:$mask, 12050 (MaskedOp (_.EltVT 12051 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12052 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12053 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12054 } 12055} 12056 12057defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12058defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12059defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12060defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12061 12062defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12063defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12064defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12065defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12066 12067multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, 12068 SDNode Move, X86VectorVTInfo _> { 12069 let Predicates = [HasAVX512] in { 12070 def : Pat<(_.VT (Move _.VT:$dst, 12071 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12072 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12073 } 12074} 12075 12076defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12077defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12078 12079//===----------------------------------------------------------------------===// 12080// AES instructions 12081//===----------------------------------------------------------------------===// 12082 12083multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12084 let Predicates = [HasVLX, HasVAES] in { 12085 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12086 !cast<Intrinsic>(IntPrefix), 12087 loadv2i64, 0, VR128X, i128mem>, 12088 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 12089 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12090 !cast<Intrinsic>(IntPrefix#"_256"), 12091 loadv4i64, 0, VR256X, i256mem>, 12092 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 12093 } 12094 let Predicates = [HasAVX512, HasVAES] in 12095 defm Z : AESI_binop_rm_int<Op, OpStr, 12096 !cast<Intrinsic>(IntPrefix#"_512"), 12097 loadv8i64, 0, VR512, i512mem>, 12098 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 12099} 12100 12101defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12102defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12103defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12104defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12105 12106//===----------------------------------------------------------------------===// 12107// PCLMUL instructions - Carry less multiplication 12108//===----------------------------------------------------------------------===// 12109 12110let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12111defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12112 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 12113 12114let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12115defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12116 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 12117 12118defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12119 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 12120 EVEX_CD8<64, CD8VF>, VEX_WIG; 12121} 12122 12123// Aliases 12124defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12125defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12126defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12127 12128//===----------------------------------------------------------------------===// 12129// VBMI2 12130//===----------------------------------------------------------------------===// 12131 12132multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12133 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12134 let Constraints = "$src1 = $dst", 12135 ExeDomain = VTI.ExeDomain in { 12136 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12137 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12138 "$src3, $src2", "$src2, $src3", 12139 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12140 AVX512FMA3Base, Sched<[sched]>; 12141 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12142 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12143 "$src3, $src2", "$src2, $src3", 12144 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12145 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12146 AVX512FMA3Base, 12147 Sched<[sched.Folded, sched.ReadAfterFold]>; 12148 } 12149} 12150 12151multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12152 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12153 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12154 let Constraints = "$src1 = $dst", 12155 ExeDomain = VTI.ExeDomain in 12156 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12157 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12158 "${src3}"#VTI.BroadcastStr#", $src2", 12159 "$src2, ${src3}"#VTI.BroadcastStr, 12160 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12161 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12162 AVX512FMA3Base, EVEX_B, 12163 Sched<[sched.Folded, sched.ReadAfterFold]>; 12164} 12165 12166multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12167 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12168 let Predicates = [HasVBMI2] in 12169 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12170 EVEX_V512; 12171 let Predicates = [HasVBMI2, HasVLX] in { 12172 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12173 EVEX_V256; 12174 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12175 EVEX_V128; 12176 } 12177} 12178 12179multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12180 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12181 let Predicates = [HasVBMI2] in 12182 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12183 EVEX_V512; 12184 let Predicates = [HasVBMI2, HasVLX] in { 12185 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12186 EVEX_V256; 12187 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12188 EVEX_V128; 12189 } 12190} 12191multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12192 SDNode OpNode, X86SchedWriteWidths sched> { 12193 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12194 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 12195 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12196 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12197 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12198 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 12199} 12200 12201multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12202 SDNode OpNode, X86SchedWriteWidths sched> { 12203 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12204 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12205 VEX_W, EVEX_CD8<16, CD8VF>; 12206 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12207 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 12208 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12209 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 12210} 12211 12212// Concat & Shift 12213defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12214defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12215defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12216defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12217 12218// Compress 12219defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12220 avx512vl_i8_info, HasVBMI2>, EVEX, 12221 NotMemoryFoldable; 12222defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12223 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 12224 NotMemoryFoldable; 12225// Expand 12226defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12227 avx512vl_i8_info, HasVBMI2>, EVEX; 12228defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12229 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 12230 12231//===----------------------------------------------------------------------===// 12232// VNNI 12233//===----------------------------------------------------------------------===// 12234 12235let Constraints = "$src1 = $dst" in 12236multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12237 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12238 bit IsCommutable> { 12239 let ExeDomain = VTI.ExeDomain in { 12240 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12241 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12242 "$src3, $src2", "$src2, $src3", 12243 (VTI.VT (OpNode VTI.RC:$src1, 12244 VTI.RC:$src2, VTI.RC:$src3)), 12245 IsCommutable, IsCommutable>, 12246 EVEX_4V, T8PD, Sched<[sched]>; 12247 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12248 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12249 "$src3, $src2", "$src2, $src3", 12250 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12251 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12252 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 12253 Sched<[sched.Folded, sched.ReadAfterFold]>; 12254 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12255 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12256 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12257 "$src2, ${src3}"#VTI.BroadcastStr, 12258 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12259 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12260 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 12261 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 12262 } 12263} 12264 12265multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12266 X86SchedWriteWidths sched, bit IsCommutable> { 12267 let Predicates = [HasVNNI] in 12268 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12269 IsCommutable>, EVEX_V512; 12270 let Predicates = [HasVNNI, HasVLX] in { 12271 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12272 IsCommutable>, EVEX_V256; 12273 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12274 IsCommutable>, EVEX_V128; 12275 } 12276} 12277 12278// FIXME: Is there a better scheduler class for VPDP? 12279defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12280defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12281defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12282defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12283 12284def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), 12285 (X86vpmaddwd node:$lhs, node:$rhs), [{ 12286 return N->hasOneUse(); 12287}]>; 12288 12289// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12290let Predicates = [HasVNNI] in { 12291 def : Pat<(v16i32 (add VR512:$src1, 12292 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12293 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12294 def : Pat<(v16i32 (add VR512:$src1, 12295 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12296 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12297} 12298let Predicates = [HasVNNI,HasVLX] in { 12299 def : Pat<(v8i32 (add VR256X:$src1, 12300 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12301 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12302 def : Pat<(v8i32 (add VR256X:$src1, 12303 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12304 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12305 def : Pat<(v4i32 (add VR128X:$src1, 12306 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12307 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12308 def : Pat<(v4i32 (add VR128X:$src1, 12309 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12310 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12311} 12312 12313//===----------------------------------------------------------------------===// 12314// Bit Algorithms 12315//===----------------------------------------------------------------------===// 12316 12317// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12318defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12319 avx512vl_i8_info, HasBITALG>; 12320defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12321 avx512vl_i16_info, HasBITALG>, VEX_W; 12322 12323defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12324defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12325 12326def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 12327 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 12328 return N->hasOneUse(); 12329}]>; 12330 12331multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12332 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12333 (ins VTI.RC:$src1, VTI.RC:$src2), 12334 "vpshufbitqmb", 12335 "$src2, $src1", "$src1, $src2", 12336 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12337 (VTI.VT VTI.RC:$src2)), 12338 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12339 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 12340 Sched<[sched]>; 12341 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12342 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12343 "vpshufbitqmb", 12344 "$src2, $src1", "$src1, $src2", 12345 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12346 (VTI.VT (VTI.LdFrag addr:$src2))), 12347 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12348 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12349 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 12350 Sched<[sched.Folded, sched.ReadAfterFold]>; 12351} 12352 12353multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12354 let Predicates = [HasBITALG] in 12355 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12356 let Predicates = [HasBITALG, HasVLX] in { 12357 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12358 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12359 } 12360} 12361 12362// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12363defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12364 12365//===----------------------------------------------------------------------===// 12366// GFNI 12367//===----------------------------------------------------------------------===// 12368 12369multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12370 X86SchedWriteWidths sched> { 12371 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12372 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12373 EVEX_V512; 12374 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12375 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12376 EVEX_V256; 12377 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12378 EVEX_V128; 12379 } 12380} 12381 12382defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12383 SchedWriteVecALU>, 12384 EVEX_CD8<8, CD8VF>, T8PD; 12385 12386multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12387 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12388 X86VectorVTInfo BcstVTI> 12389 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12390 let ExeDomain = VTI.ExeDomain in 12391 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12392 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12393 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12394 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12395 (OpNode (VTI.VT VTI.RC:$src1), 12396 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12397 (i8 timm:$src3))>, EVEX_B, 12398 Sched<[sched.Folded, sched.ReadAfterFold]>; 12399} 12400 12401multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12402 X86SchedWriteWidths sched> { 12403 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12404 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12405 v64i8_info, v8i64_info>, EVEX_V512; 12406 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12407 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12408 v32i8x_info, v4i64x_info>, EVEX_V256; 12409 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12410 v16i8x_info, v2i64x_info>, EVEX_V128; 12411 } 12412} 12413 12414defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12415 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12416 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12417defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12418 X86GF2P8affineqb, SchedWriteVecIMul>, 12419 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12420 12421 12422//===----------------------------------------------------------------------===// 12423// AVX5124FMAPS 12424//===----------------------------------------------------------------------===// 12425 12426let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12427 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12428defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12429 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12430 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12431 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12432 Sched<[SchedWriteFMA.ZMM.Folded]>; 12433 12434defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12435 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12436 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12437 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12438 Sched<[SchedWriteFMA.ZMM.Folded]>; 12439 12440defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12441 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12442 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12443 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12444 Sched<[SchedWriteFMA.Scl.Folded]>; 12445 12446defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12447 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12448 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12449 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12450 Sched<[SchedWriteFMA.Scl.Folded]>; 12451} 12452 12453//===----------------------------------------------------------------------===// 12454// AVX5124VNNIW 12455//===----------------------------------------------------------------------===// 12456 12457let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12458 Constraints = "$src1 = $dst" in { 12459defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12460 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12461 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12462 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12463 Sched<[SchedWriteFMA.ZMM.Folded]>; 12464 12465defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12466 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12467 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12468 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12469 Sched<[SchedWriteFMA.ZMM.Folded]>; 12470} 12471 12472let hasSideEffects = 0 in { 12473 let mayStore = 1, SchedRW = [WriteFStoreX] in 12474 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12475 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12476 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12477} 12478 12479//===----------------------------------------------------------------------===// 12480// VP2INTERSECT 12481//===----------------------------------------------------------------------===// 12482 12483multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12484 def rr : I<0x68, MRMSrcReg, 12485 (outs _.KRPC:$dst), 12486 (ins _.RC:$src1, _.RC:$src2), 12487 !strconcat("vp2intersect", _.Suffix, 12488 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12489 [(set _.KRPC:$dst, (X86vp2intersect 12490 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12491 EVEX_4V, T8XD, Sched<[sched]>; 12492 12493 def rm : I<0x68, MRMSrcMem, 12494 (outs _.KRPC:$dst), 12495 (ins _.RC:$src1, _.MemOp:$src2), 12496 !strconcat("vp2intersect", _.Suffix, 12497 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12498 [(set _.KRPC:$dst, (X86vp2intersect 12499 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12500 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12501 Sched<[sched.Folded, sched.ReadAfterFold]>; 12502 12503 def rmb : I<0x68, MRMSrcMem, 12504 (outs _.KRPC:$dst), 12505 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12506 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12507 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12508 [(set _.KRPC:$dst, (X86vp2intersect 12509 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12510 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12511 Sched<[sched.Folded, sched.ReadAfterFold]>; 12512} 12513 12514multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12515 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12516 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12517 12518 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12519 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12520 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12521 } 12522} 12523 12524defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12525defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W; 12526 12527multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12528 X86SchedWriteWidths sched, 12529 AVX512VLVectorVTInfo _SrcVTInfo, 12530 AVX512VLVectorVTInfo _DstVTInfo, 12531 SDNode OpNode, Predicate prd, 12532 bit IsCommutable = 0> { 12533 let Predicates = [prd] in 12534 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12535 _SrcVTInfo.info512, _DstVTInfo.info512, 12536 _SrcVTInfo.info512, IsCommutable>, 12537 EVEX_V512, EVEX_CD8<32, CD8VF>; 12538 let Predicates = [HasVLX, prd] in { 12539 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12540 _SrcVTInfo.info256, _DstVTInfo.info256, 12541 _SrcVTInfo.info256, IsCommutable>, 12542 EVEX_V256, EVEX_CD8<32, CD8VF>; 12543 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12544 _SrcVTInfo.info128, _DstVTInfo.info128, 12545 _SrcVTInfo.info128, IsCommutable>, 12546 EVEX_V128, EVEX_CD8<32, CD8VF>; 12547 } 12548} 12549 12550let ExeDomain = SSEPackedSingle in 12551defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12552 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12553 avx512vl_f32_info, avx512vl_i16_info, 12554 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12555 12556// Truncate Float to BFloat16 12557multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12558 X86SchedWriteWidths sched> { 12559 let ExeDomain = SSEPackedSingle in { 12560 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12561 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12562 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12563 } 12564 let Predicates = [HasBF16, HasVLX] in { 12565 let Uses = []<Register>, mayRaiseFPException = 0 in { 12566 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12567 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12568 VK4WM>, EVEX_V128; 12569 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12570 X86cvtneps2bf16, X86cvtneps2bf16, 12571 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12572 } 12573 } // Predicates = [HasBF16, HasVLX] 12574 } // ExeDomain = SSEPackedSingle 12575 12576 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12577 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12578 VR128X:$src), 0>; 12579 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12580 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12581 f128mem:$src), 0, "intel">; 12582 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12583 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12584 VR256X:$src), 0>; 12585 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12586 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12587 f256mem:$src), 0, "intel">; 12588} 12589 12590defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12591 SchedWriteCvtPD2PS>, T8XS, 12592 EVEX_CD8<32, CD8VF>; 12593 12594let Predicates = [HasBF16, HasVLX] in { 12595 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12596 // patterns have been disabled with null_frag. 12597 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12598 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12599 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12600 VK4WM:$mask), 12601 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12602 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12603 VK4WM:$mask), 12604 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12605 12606 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12607 (VCVTNEPS2BF16Z128rm addr:$src)>; 12608 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12609 VK4WM:$mask), 12610 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12611 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12612 VK4WM:$mask), 12613 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12614 12615 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12616 (X86VBroadcastld32 addr:$src)))), 12617 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12618 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12619 (v8i16 VR128X:$src0), VK4WM:$mask), 12620 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12621 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12622 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12623 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12624} 12625 12626let Constraints = "$src1 = $dst" in { 12627multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12628 X86FoldableSchedWrite sched, 12629 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12630 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12631 (ins src_v.RC:$src2, src_v.RC:$src3), 12632 OpcodeStr, "$src3, $src2", "$src2, $src3", 12633 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12634 EVEX_4V, Sched<[sched]>; 12635 12636 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12637 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12638 OpcodeStr, "$src3, $src2", "$src2, $src3", 12639 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12640 (src_v.LdFrag addr:$src3)))>, EVEX_4V, 12641 Sched<[sched.Folded, sched.ReadAfterFold]>; 12642 12643 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12644 (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3), 12645 OpcodeStr, 12646 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12647 !strconcat("$src2, ${src3}", _.BroadcastStr), 12648 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12649 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12650 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 12651 12652} 12653} // Constraints = "$src1 = $dst" 12654 12655multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12656 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12657 AVX512VLVectorVTInfo src_v, Predicate prd> { 12658 let Predicates = [prd] in { 12659 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12660 src_v.info512>, EVEX_V512; 12661 } 12662 let Predicates = [HasVLX, prd] in { 12663 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12664 src_v.info256>, EVEX_V256; 12665 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12666 src_v.info128>, EVEX_V128; 12667 } 12668} 12669 12670let ExeDomain = SSEPackedSingle in 12671defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12672 avx512vl_f32_info, avx512vl_i32_info, 12673 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12674