1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 32), 4, 48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; 49 50 // The vector VT. 51 ValueType VT = !cast<ValueType>(VTName); 52 53 string EltTypeName = !cast<string>(EltVT); 54 // Size of the element type in bits, e.g. 32 for v16i32. 55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 56 int EltSize = EltVT.Size; 57 58 // "i" for integer types and "f" for floating-point types 59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 60 61 // Size of RC in bits, e.g. 512 for VR512. 62 int Size = VT.Size; 63 64 // The corresponding memory operand, e.g. i512mem for VR512. 65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 67 // FP scalar memory operand for intrinsics - ssmem/sdmem. 68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)); 70 71 // Load patterns 72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 73 74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 75 76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 77 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 78 79 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"), 80 !cast<PatFrags>("sse_load_f32"), 81 !if (!eq (EltTypeName, "f64"), 82 !cast<PatFrags>("sse_load_f64"), 83 ?)); 84 85 // The string to specify embedded broadcast in assembly. 86 string BroadcastStr = "{1to" # NumElts # "}"; 87 88 // 8-bit compressed displacement tuple/subvector format. This is only 89 // defined for NumElts <= 8. 90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 91 !cast<CD8VForm>("CD8VT" # NumElts), ?); 92 93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 94 !if (!eq (Size, 256), sub_ymm, ?)); 95 96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 98 SSEPackedInt)); 99 100 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); 101 102 dag ImmAllZerosV = (VT immAllZerosV); 103 104 string ZSuffix = !if (!eq (Size, 128), "Z128", 105 !if (!eq (Size, 256), "Z256", "Z")); 106} 107 108def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 111def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 113def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 114 115// "x" in v32i8x_info means RC = VR256X 116def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 118def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 119def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 120def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 121def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 122 123def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 124def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 125def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 126def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 127def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 128def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 129 130// We map scalar types to the smallest (128-bit) vector type 131// with the appropriate element type. This allows to use the same masking logic. 132def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 133def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 134def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 135def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 136 137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 138 X86VectorVTInfo i128> { 139 X86VectorVTInfo info512 = i512; 140 X86VectorVTInfo info256 = i256; 141 X86VectorVTInfo info128 = i128; 142} 143 144def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 145 v16i8x_info>; 146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 147 v8i16x_info>; 148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 149 v4i32x_info>; 150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 151 v2i64x_info>; 152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 153 v4f32x_info>; 154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 155 v2f64x_info>; 156 157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 158 ValueType _vt> { 159 RegisterClass KRC = _krc; 160 RegisterClass KRCWM = _krcwm; 161 ValueType KVT = _vt; 162} 163 164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 171 172// Used for matching masked operations. Ensures the operation part only has a 173// single use. 174def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 175 (vselect node:$mask, node:$src1, node:$src2), [{ 176 return isProfitableToFormMaskedOp(N); 177}]>; 178 179def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 180 (X86selects node:$mask, node:$src1, node:$src2), [{ 181 return isProfitableToFormMaskedOp(N); 182}]>; 183 184// This multiclass generates the masking variants from the non-masking 185// variant. It only provides the assembly pieces for the masking variants. 186// It assumes custom ISel patterns for masking which can be provided as 187// template arguments. 188multiclass AVX512_maskable_custom<bits<8> O, Format F, 189 dag Outs, 190 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 191 string OpcodeStr, 192 string AttSrcAsm, string IntelSrcAsm, 193 list<dag> Pattern, 194 list<dag> MaskingPattern, 195 list<dag> ZeroMaskingPattern, 196 string MaskingConstraint = "", 197 bit IsCommutable = 0, 198 bit IsKCommutable = 0, 199 bit IsKZCommutable = IsCommutable> { 200 let isCommutable = IsCommutable in 201 def NAME: AVX512<O, F, Outs, Ins, 202 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 203 "$dst, "#IntelSrcAsm#"}", 204 Pattern>; 205 206 // Prefer over VMOV*rrk Pat<> 207 let isCommutable = IsKCommutable in 208 def NAME#k: AVX512<O, F, Outs, MaskingIns, 209 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 210 "$dst {${mask}}, "#IntelSrcAsm#"}", 211 MaskingPattern>, 212 EVEX_K { 213 // In case of the 3src subclass this is overridden with a let. 214 string Constraints = MaskingConstraint; 215 } 216 217 // Zero mask does not add any restrictions to commute operands transformation. 218 // So, it is Ok to use IsCommutable instead of IsKCommutable. 219 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<> 220 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 221 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 222 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 223 ZeroMaskingPattern>, 224 EVEX_KZ; 225} 226 227 228// Common base class of AVX512_maskable and AVX512_maskable_3src. 229multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 230 dag Outs, 231 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 232 string OpcodeStr, 233 string AttSrcAsm, string IntelSrcAsm, 234 dag RHS, dag MaskingRHS, 235 SDPatternOperator Select = vselect_mask, 236 string MaskingConstraint = "", 237 bit IsCommutable = 0, 238 bit IsKCommutable = 0, 239 bit IsKZCommutable = IsCommutable> : 240 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 241 AttSrcAsm, IntelSrcAsm, 242 [(set _.RC:$dst, RHS)], 243 [(set _.RC:$dst, MaskingRHS)], 244 [(set _.RC:$dst, 245 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 246 MaskingConstraint, IsCommutable, 247 IsKCommutable, IsKZCommutable>; 248 249// This multiclass generates the unconditional/non-masking, the masking and 250// the zero-masking variant of the vector instruction. In the masking case, the 251// preserved vector elements come from a new dummy input operand tied to $dst. 252// This version uses a separate dag for non-masking and masking. 253multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 254 dag Outs, dag Ins, string OpcodeStr, 255 string AttSrcAsm, string IntelSrcAsm, 256 dag RHS, dag MaskRHS, 257 bit IsCommutable = 0, bit IsKCommutable = 0, 258 bit IsKZCommutable = IsCommutable> : 259 AVX512_maskable_custom<O, F, Outs, Ins, 260 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 261 !con((ins _.KRCWM:$mask), Ins), 262 OpcodeStr, AttSrcAsm, IntelSrcAsm, 263 [(set _.RC:$dst, RHS)], 264 [(set _.RC:$dst, 265 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 266 [(set _.RC:$dst, 267 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 268 "$src0 = $dst", IsCommutable, IsKCommutable, 269 IsKZCommutable>; 270 271// This multiclass generates the unconditional/non-masking, the masking and 272// the zero-masking variant of the vector instruction. In the masking case, the 273// preserved vector elements come from a new dummy input operand tied to $dst. 274multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 275 dag Outs, dag Ins, string OpcodeStr, 276 string AttSrcAsm, string IntelSrcAsm, 277 dag RHS, 278 bit IsCommutable = 0, bit IsKCommutable = 0, 279 bit IsKZCommutable = IsCommutable, 280 SDPatternOperator Select = vselect_mask> : 281 AVX512_maskable_common<O, F, _, Outs, Ins, 282 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 283 !con((ins _.KRCWM:$mask), Ins), 284 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 285 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 286 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 287 IsKZCommutable>; 288 289// This multiclass generates the unconditional/non-masking, the masking and 290// the zero-masking variant of the scalar instruction. 291multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 292 dag Outs, dag Ins, string OpcodeStr, 293 string AttSrcAsm, string IntelSrcAsm, 294 dag RHS> : 295 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 296 RHS, 0, 0, 0, X86selects_mask>; 297 298// Similar to AVX512_maskable but in this case one of the source operands 299// ($src1) is already tied to $dst so we just use that for the preserved 300// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 301// $src1. 302multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 303 dag Outs, dag NonTiedIns, string OpcodeStr, 304 string AttSrcAsm, string IntelSrcAsm, 305 dag RHS, 306 bit IsCommutable = 0, 307 bit IsKCommutable = 0, 308 SDPatternOperator Select = vselect_mask, 309 bit MaskOnly = 0> : 310 AVX512_maskable_common<O, F, _, Outs, 311 !con((ins _.RC:$src1), NonTiedIns), 312 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 313 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 314 OpcodeStr, AttSrcAsm, IntelSrcAsm, 315 !if(MaskOnly, (null_frag), RHS), 316 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 317 Select, "", IsCommutable, IsKCommutable>; 318 319// Similar to AVX512_maskable_3src but in this case the input VT for the tied 320// operand differs from the output VT. This requires a bitconvert on 321// the preserved vector going into the vselect. 322// NOTE: The unmasked pattern is disabled. 323multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 324 X86VectorVTInfo InVT, 325 dag Outs, dag NonTiedIns, string OpcodeStr, 326 string AttSrcAsm, string IntelSrcAsm, 327 dag RHS, bit IsCommutable = 0> : 328 AVX512_maskable_common<O, F, OutVT, Outs, 329 !con((ins InVT.RC:$src1), NonTiedIns), 330 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 331 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 332 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 333 (vselect_mask InVT.KRCWM:$mask, RHS, 334 (bitconvert InVT.RC:$src1)), 335 vselect_mask, "", IsCommutable>; 336 337multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 338 dag Outs, dag NonTiedIns, string OpcodeStr, 339 string AttSrcAsm, string IntelSrcAsm, 340 dag RHS, 341 bit IsCommutable = 0, 342 bit IsKCommutable = 0, 343 bit MaskOnly = 0> : 344 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 345 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 346 X86selects_mask, MaskOnly>; 347 348multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 349 dag Outs, dag Ins, 350 string OpcodeStr, 351 string AttSrcAsm, string IntelSrcAsm, 352 list<dag> Pattern> : 353 AVX512_maskable_custom<O, F, Outs, Ins, 354 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 355 !con((ins _.KRCWM:$mask), Ins), 356 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 357 "$src0 = $dst">; 358 359multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 360 dag Outs, dag NonTiedIns, 361 string OpcodeStr, 362 string AttSrcAsm, string IntelSrcAsm, 363 list<dag> Pattern> : 364 AVX512_maskable_custom<O, F, Outs, 365 !con((ins _.RC:$src1), NonTiedIns), 366 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 367 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 368 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 369 "">; 370 371// Instruction with mask that puts result in mask register, 372// like "compare" and "vptest" 373multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 374 dag Outs, 375 dag Ins, dag MaskingIns, 376 string OpcodeStr, 377 string AttSrcAsm, string IntelSrcAsm, 378 list<dag> Pattern, 379 list<dag> MaskingPattern, 380 bit IsCommutable = 0> { 381 let isCommutable = IsCommutable in { 382 def NAME: AVX512<O, F, Outs, Ins, 383 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 384 "$dst, "#IntelSrcAsm#"}", 385 Pattern>; 386 387 def NAME#k: AVX512<O, F, Outs, MaskingIns, 388 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 389 "$dst {${mask}}, "#IntelSrcAsm#"}", 390 MaskingPattern>, EVEX_K; 391 } 392} 393 394multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 395 dag Outs, 396 dag Ins, dag MaskingIns, 397 string OpcodeStr, 398 string AttSrcAsm, string IntelSrcAsm, 399 dag RHS, dag MaskingRHS, 400 bit IsCommutable = 0> : 401 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 402 AttSrcAsm, IntelSrcAsm, 403 [(set _.KRC:$dst, RHS)], 404 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 405 406multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 407 dag Outs, dag Ins, string OpcodeStr, 408 string AttSrcAsm, string IntelSrcAsm, 409 dag RHS, dag RHS_su, bit IsCommutable = 0> : 410 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 411 !con((ins _.KRCWM:$mask), Ins), 412 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 413 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 414 415// Used by conversion instructions. 416multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 417 dag Outs, 418 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 419 string OpcodeStr, 420 string AttSrcAsm, string IntelSrcAsm, 421 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 422 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 423 AttSrcAsm, IntelSrcAsm, 424 [(set _.RC:$dst, RHS)], 425 [(set _.RC:$dst, MaskingRHS)], 426 [(set _.RC:$dst, ZeroMaskingRHS)], 427 "$src0 = $dst">; 428 429multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 430 dag Outs, dag NonTiedIns, string OpcodeStr, 431 string AttSrcAsm, string IntelSrcAsm, 432 dag RHS, dag MaskingRHS, bit IsCommutable, 433 bit IsKCommutable> : 434 AVX512_maskable_custom<O, F, Outs, 435 !con((ins _.RC:$src1), NonTiedIns), 436 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 437 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 438 OpcodeStr, AttSrcAsm, IntelSrcAsm, 439 [(set _.RC:$dst, RHS)], 440 [(set _.RC:$dst, 441 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 442 [(set _.RC:$dst, 443 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 444 "", IsCommutable, IsKCommutable>; 445 446// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 447// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 448// swizzled by ExecutionDomainFix to pxor. 449// We set canFoldAsLoad because this can be converted to a constant-pool 450// load of an all-zeros value if folding it would be beneficial. 451let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 452 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 453def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 454 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 455def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 456 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 457} 458 459let Predicates = [HasAVX512] in { 460def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 461def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 462def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 463def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 464def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 465} 466 467// Alias instructions that allow VPTERNLOG to be used with a mask to create 468// a mix of all ones and all zeros elements. This is done this way to force 469// the same register to be used as input for all three sources. 470let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 471def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 472 (ins VK16WM:$mask), "", 473 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 474 (v16i32 immAllOnesV), 475 (v16i32 immAllZerosV)))]>; 476def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 477 (ins VK8WM:$mask), "", 478 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 479 (v8i64 immAllOnesV), 480 (v8i64 immAllZerosV)))]>; 481} 482 483let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 484 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 485def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 486 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 487def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 488 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 489} 490 491let Predicates = [HasAVX512] in { 492def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 493def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 494def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 495def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 496def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 497def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 498def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 499def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 500def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 501def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 502} 503 504// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 505// This is expanded by ExpandPostRAPseudos. 506let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 507 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 508 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 509 [(set FR32X:$dst, fp32imm0)]>; 510 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 511 [(set FR64X:$dst, fp64imm0)]>; 512 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 513 [(set VR128X:$dst, fp128imm0)]>; 514} 515 516//===----------------------------------------------------------------------===// 517// AVX-512 - VECTOR INSERT 518// 519 520// Supports two different pattern operators for mask and unmasked ops. Allows 521// null_frag to be passed for one. 522multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 523 X86VectorVTInfo To, 524 SDPatternOperator vinsert_insert, 525 SDPatternOperator vinsert_for_mask, 526 X86FoldableSchedWrite sched> { 527 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 528 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 529 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 530 "vinsert" # From.EltTypeName # "x" # From.NumElts, 531 "$src3, $src2, $src1", "$src1, $src2, $src3", 532 (vinsert_insert:$src3 (To.VT To.RC:$src1), 533 (From.VT From.RC:$src2), 534 (iPTR imm)), 535 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 536 (From.VT From.RC:$src2), 537 (iPTR imm))>, 538 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 539 let mayLoad = 1 in 540 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 541 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 542 "vinsert" # From.EltTypeName # "x" # From.NumElts, 543 "$src3, $src2, $src1", "$src1, $src2, $src3", 544 (vinsert_insert:$src3 (To.VT To.RC:$src1), 545 (From.VT (From.LdFrag addr:$src2)), 546 (iPTR imm)), 547 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 548 (From.VT (From.LdFrag addr:$src2)), 549 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 550 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 551 Sched<[sched.Folded, sched.ReadAfterFold]>; 552 } 553} 554 555// Passes the same pattern operator for masked and unmasked ops. 556multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 557 X86VectorVTInfo To, 558 SDPatternOperator vinsert_insert, 559 X86FoldableSchedWrite sched> : 560 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 561 562multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 563 X86VectorVTInfo To, PatFrag vinsert_insert, 564 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 565 let Predicates = p in { 566 def : Pat<(vinsert_insert:$ins 567 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 568 (To.VT (!cast<Instruction>(InstrStr#"rr") 569 To.RC:$src1, From.RC:$src2, 570 (INSERT_get_vinsert_imm To.RC:$ins)))>; 571 572 def : Pat<(vinsert_insert:$ins 573 (To.VT To.RC:$src1), 574 (From.VT (From.LdFrag addr:$src2)), 575 (iPTR imm)), 576 (To.VT (!cast<Instruction>(InstrStr#"rm") 577 To.RC:$src1, addr:$src2, 578 (INSERT_get_vinsert_imm To.RC:$ins)))>; 579 } 580} 581 582multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 583 ValueType EltVT64, int Opcode256, 584 X86FoldableSchedWrite sched> { 585 586 let Predicates = [HasVLX] in 587 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 588 X86VectorVTInfo< 4, EltVT32, VR128X>, 589 X86VectorVTInfo< 8, EltVT32, VR256X>, 590 vinsert128_insert, sched>, EVEX_V256; 591 592 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 593 X86VectorVTInfo< 4, EltVT32, VR128X>, 594 X86VectorVTInfo<16, EltVT32, VR512>, 595 vinsert128_insert, sched>, EVEX_V512; 596 597 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 598 X86VectorVTInfo< 4, EltVT64, VR256X>, 599 X86VectorVTInfo< 8, EltVT64, VR512>, 600 vinsert256_insert, sched>, VEX_W, EVEX_V512; 601 602 // Even with DQI we'd like to only use these instructions for masking. 603 let Predicates = [HasVLX, HasDQI] in 604 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 605 X86VectorVTInfo< 2, EltVT64, VR128X>, 606 X86VectorVTInfo< 4, EltVT64, VR256X>, 607 null_frag, vinsert128_insert, sched>, 608 VEX_W1X, EVEX_V256; 609 610 // Even with DQI we'd like to only use these instructions for masking. 611 let Predicates = [HasDQI] in { 612 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 613 X86VectorVTInfo< 2, EltVT64, VR128X>, 614 X86VectorVTInfo< 8, EltVT64, VR512>, 615 null_frag, vinsert128_insert, sched>, 616 VEX_W, EVEX_V512; 617 618 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 619 X86VectorVTInfo< 8, EltVT32, VR256X>, 620 X86VectorVTInfo<16, EltVT32, VR512>, 621 null_frag, vinsert256_insert, sched>, 622 EVEX_V512; 623 } 624} 625 626// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 627defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 628defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 629 630// Codegen pattern with the alternative types, 631// Even with AVX512DQ we'll still use these for unmasked operations. 632defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 633 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 634defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 635 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 636 637defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 638 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 639defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 640 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 641 642defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 643 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 644defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 645 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 646 647// Codegen pattern with the alternative types insert VEC128 into VEC256 648defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 649 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 650defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 651 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 652// Codegen pattern with the alternative types insert VEC128 into VEC512 653defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 654 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 655defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 656 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 657// Codegen pattern with the alternative types insert VEC256 into VEC512 658defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 659 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 660defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 661 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 662 663 664multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 665 X86VectorVTInfo To, X86VectorVTInfo Cast, 666 PatFrag vinsert_insert, 667 SDNodeXForm INSERT_get_vinsert_imm, 668 list<Predicate> p> { 669let Predicates = p in { 670 def : Pat<(Cast.VT 671 (vselect_mask Cast.KRCWM:$mask, 672 (bitconvert 673 (vinsert_insert:$ins (To.VT To.RC:$src1), 674 (From.VT From.RC:$src2), 675 (iPTR imm))), 676 Cast.RC:$src0)), 677 (!cast<Instruction>(InstrStr#"rrk") 678 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 679 (INSERT_get_vinsert_imm To.RC:$ins))>; 680 def : Pat<(Cast.VT 681 (vselect_mask Cast.KRCWM:$mask, 682 (bitconvert 683 (vinsert_insert:$ins (To.VT To.RC:$src1), 684 (From.VT 685 (bitconvert 686 (From.LdFrag addr:$src2))), 687 (iPTR imm))), 688 Cast.RC:$src0)), 689 (!cast<Instruction>(InstrStr#"rmk") 690 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 691 (INSERT_get_vinsert_imm To.RC:$ins))>; 692 693 def : Pat<(Cast.VT 694 (vselect_mask Cast.KRCWM:$mask, 695 (bitconvert 696 (vinsert_insert:$ins (To.VT To.RC:$src1), 697 (From.VT From.RC:$src2), 698 (iPTR imm))), 699 Cast.ImmAllZerosV)), 700 (!cast<Instruction>(InstrStr#"rrkz") 701 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 702 (INSERT_get_vinsert_imm To.RC:$ins))>; 703 def : Pat<(Cast.VT 704 (vselect_mask Cast.KRCWM:$mask, 705 (bitconvert 706 (vinsert_insert:$ins (To.VT To.RC:$src1), 707 (From.VT (From.LdFrag addr:$src2)), 708 (iPTR imm))), 709 Cast.ImmAllZerosV)), 710 (!cast<Instruction>(InstrStr#"rmkz") 711 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 712 (INSERT_get_vinsert_imm To.RC:$ins))>; 713} 714} 715 716defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 717 v8f32x_info, vinsert128_insert, 718 INSERT_get_vinsert128_imm, [HasVLX]>; 719defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 720 v4f64x_info, vinsert128_insert, 721 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 722 723defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 724 v8i32x_info, vinsert128_insert, 725 INSERT_get_vinsert128_imm, [HasVLX]>; 726defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 727 v8i32x_info, vinsert128_insert, 728 INSERT_get_vinsert128_imm, [HasVLX]>; 729defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 730 v8i32x_info, vinsert128_insert, 731 INSERT_get_vinsert128_imm, [HasVLX]>; 732defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 733 v4i64x_info, vinsert128_insert, 734 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 735defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 736 v4i64x_info, vinsert128_insert, 737 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 738defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 739 v4i64x_info, vinsert128_insert, 740 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 741 742defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 743 v16f32_info, vinsert128_insert, 744 INSERT_get_vinsert128_imm, [HasAVX512]>; 745defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 746 v8f64_info, vinsert128_insert, 747 INSERT_get_vinsert128_imm, [HasDQI]>; 748 749defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 750 v16i32_info, vinsert128_insert, 751 INSERT_get_vinsert128_imm, [HasAVX512]>; 752defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 753 v16i32_info, vinsert128_insert, 754 INSERT_get_vinsert128_imm, [HasAVX512]>; 755defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 756 v16i32_info, vinsert128_insert, 757 INSERT_get_vinsert128_imm, [HasAVX512]>; 758defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 759 v8i64_info, vinsert128_insert, 760 INSERT_get_vinsert128_imm, [HasDQI]>; 761defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 762 v8i64_info, vinsert128_insert, 763 INSERT_get_vinsert128_imm, [HasDQI]>; 764defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 765 v8i64_info, vinsert128_insert, 766 INSERT_get_vinsert128_imm, [HasDQI]>; 767 768defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 769 v16f32_info, vinsert256_insert, 770 INSERT_get_vinsert256_imm, [HasDQI]>; 771defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 772 v8f64_info, vinsert256_insert, 773 INSERT_get_vinsert256_imm, [HasAVX512]>; 774 775defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 776 v16i32_info, vinsert256_insert, 777 INSERT_get_vinsert256_imm, [HasDQI]>; 778defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 779 v16i32_info, vinsert256_insert, 780 INSERT_get_vinsert256_imm, [HasDQI]>; 781defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 782 v16i32_info, vinsert256_insert, 783 INSERT_get_vinsert256_imm, [HasDQI]>; 784defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 785 v8i64_info, vinsert256_insert, 786 INSERT_get_vinsert256_imm, [HasAVX512]>; 787defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 788 v8i64_info, vinsert256_insert, 789 INSERT_get_vinsert256_imm, [HasAVX512]>; 790defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 791 v8i64_info, vinsert256_insert, 792 INSERT_get_vinsert256_imm, [HasAVX512]>; 793 794// vinsertps - insert f32 to XMM 795let ExeDomain = SSEPackedSingle in { 796let isCommutable = 1 in 797def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 798 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 799 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 800 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 801 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 802def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 803 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 804 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 805 [(set VR128X:$dst, (X86insertps VR128X:$src1, 806 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 807 timm:$src3))]>, 808 EVEX_4V, EVEX_CD8<32, CD8VT1>, 809 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 810} 811 812//===----------------------------------------------------------------------===// 813// AVX-512 VECTOR EXTRACT 814//--- 815 816// Supports two different pattern operators for mask and unmasked ops. Allows 817// null_frag to be passed for one. 818multiclass vextract_for_size_split<int Opcode, 819 X86VectorVTInfo From, X86VectorVTInfo To, 820 SDPatternOperator vextract_extract, 821 SDPatternOperator vextract_for_mask, 822 SchedWrite SchedRR, SchedWrite SchedMR> { 823 824 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 825 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 826 (ins From.RC:$src1, u8imm:$idx), 827 "vextract" # To.EltTypeName # "x" # To.NumElts, 828 "$idx, $src1", "$src1, $idx", 829 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 830 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 831 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 832 833 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 834 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 835 "vextract" # To.EltTypeName # "x" # To.NumElts # 836 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 837 [(store (To.VT (vextract_extract:$idx 838 (From.VT From.RC:$src1), (iPTR imm))), 839 addr:$dst)]>, EVEX, 840 Sched<[SchedMR]>; 841 842 let mayStore = 1, hasSideEffects = 0 in 843 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 844 (ins To.MemOp:$dst, To.KRCWM:$mask, 845 From.RC:$src1, u8imm:$idx), 846 "vextract" # To.EltTypeName # "x" # To.NumElts # 847 "\t{$idx, $src1, $dst {${mask}}|" 848 "$dst {${mask}}, $src1, $idx}", []>, 849 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 850 } 851} 852 853// Passes the same pattern operator for masked and unmasked ops. 854multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 855 X86VectorVTInfo To, 856 SDPatternOperator vextract_extract, 857 SchedWrite SchedRR, SchedWrite SchedMR> : 858 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 859 860// Codegen pattern for the alternative types 861multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 862 X86VectorVTInfo To, PatFrag vextract_extract, 863 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 864 let Predicates = p in { 865 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 866 (To.VT (!cast<Instruction>(InstrStr#"rr") 867 From.RC:$src1, 868 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 869 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 870 (iPTR imm))), addr:$dst), 871 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 872 (EXTRACT_get_vextract_imm To.RC:$ext))>; 873 } 874} 875 876multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 877 ValueType EltVT64, int Opcode256, 878 SchedWrite SchedRR, SchedWrite SchedMR> { 879 let Predicates = [HasAVX512] in { 880 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 881 X86VectorVTInfo<16, EltVT32, VR512>, 882 X86VectorVTInfo< 4, EltVT32, VR128X>, 883 vextract128_extract, SchedRR, SchedMR>, 884 EVEX_V512, EVEX_CD8<32, CD8VT4>; 885 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 886 X86VectorVTInfo< 8, EltVT64, VR512>, 887 X86VectorVTInfo< 4, EltVT64, VR256X>, 888 vextract256_extract, SchedRR, SchedMR>, 889 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 890 } 891 let Predicates = [HasVLX] in 892 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 893 X86VectorVTInfo< 8, EltVT32, VR256X>, 894 X86VectorVTInfo< 4, EltVT32, VR128X>, 895 vextract128_extract, SchedRR, SchedMR>, 896 EVEX_V256, EVEX_CD8<32, CD8VT4>; 897 898 // Even with DQI we'd like to only use these instructions for masking. 899 let Predicates = [HasVLX, HasDQI] in 900 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 901 X86VectorVTInfo< 4, EltVT64, VR256X>, 902 X86VectorVTInfo< 2, EltVT64, VR128X>, 903 null_frag, vextract128_extract, SchedRR, SchedMR>, 904 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 905 906 // Even with DQI we'd like to only use these instructions for masking. 907 let Predicates = [HasDQI] in { 908 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 909 X86VectorVTInfo< 8, EltVT64, VR512>, 910 X86VectorVTInfo< 2, EltVT64, VR128X>, 911 null_frag, vextract128_extract, SchedRR, SchedMR>, 912 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 913 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 914 X86VectorVTInfo<16, EltVT32, VR512>, 915 X86VectorVTInfo< 8, EltVT32, VR256X>, 916 null_frag, vextract256_extract, SchedRR, SchedMR>, 917 EVEX_V512, EVEX_CD8<32, CD8VT8>; 918 } 919} 920 921// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 922defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 923defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 924 925// extract_subvector codegen patterns with the alternative types. 926// Even with AVX512DQ we'll still use these for unmasked operations. 927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 931 932defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 933 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 934defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 935 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 936 937defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 938 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 941 942// Codegen pattern with the alternative types extract VEC128 from VEC256 943defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 944 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 945defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 946 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 947 948// Codegen pattern with the alternative types extract VEC128 from VEC512 949defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 950 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 951defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 952 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 953// Codegen pattern with the alternative types extract VEC256 from VEC512 954defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 955 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 956defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 957 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 958 959 960// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 961// smaller extract to enable EVEX->VEX. 962let Predicates = [NoVLX] in { 963def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 964 (v2i64 (VEXTRACTI128rr 965 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 966 (iPTR 1)))>; 967def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 968 (v2f64 (VEXTRACTF128rr 969 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 970 (iPTR 1)))>; 971def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 972 (v4i32 (VEXTRACTI128rr 973 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 974 (iPTR 1)))>; 975def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 976 (v4f32 (VEXTRACTF128rr 977 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 978 (iPTR 1)))>; 979def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 980 (v8i16 (VEXTRACTI128rr 981 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 982 (iPTR 1)))>; 983def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 984 (v16i8 (VEXTRACTI128rr 985 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 986 (iPTR 1)))>; 987} 988 989// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 990// smaller extract to enable EVEX->VEX. 991let Predicates = [HasVLX] in { 992def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 993 (v2i64 (VEXTRACTI32x4Z256rr 994 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 995 (iPTR 1)))>; 996def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 997 (v2f64 (VEXTRACTF32x4Z256rr 998 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 999 (iPTR 1)))>; 1000def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1001 (v4i32 (VEXTRACTI32x4Z256rr 1002 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1003 (iPTR 1)))>; 1004def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1005 (v4f32 (VEXTRACTF32x4Z256rr 1006 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1007 (iPTR 1)))>; 1008def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1009 (v8i16 (VEXTRACTI32x4Z256rr 1010 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1011 (iPTR 1)))>; 1012def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1013 (v16i8 (VEXTRACTI32x4Z256rr 1014 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1015 (iPTR 1)))>; 1016} 1017 1018 1019// Additional patterns for handling a bitcast between the vselect and the 1020// extract_subvector. 1021multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 1022 X86VectorVTInfo To, X86VectorVTInfo Cast, 1023 PatFrag vextract_extract, 1024 SDNodeXForm EXTRACT_get_vextract_imm, 1025 list<Predicate> p> { 1026let Predicates = p in { 1027 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1028 (bitconvert 1029 (To.VT (vextract_extract:$ext 1030 (From.VT From.RC:$src), (iPTR imm)))), 1031 To.RC:$src0)), 1032 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1033 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1034 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1035 1036 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1037 (bitconvert 1038 (To.VT (vextract_extract:$ext 1039 (From.VT From.RC:$src), (iPTR imm)))), 1040 Cast.ImmAllZerosV)), 1041 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1042 Cast.KRCWM:$mask, From.RC:$src, 1043 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1044} 1045} 1046 1047defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1048 v4f32x_info, vextract128_extract, 1049 EXTRACT_get_vextract128_imm, [HasVLX]>; 1050defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1051 v2f64x_info, vextract128_extract, 1052 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1053 1054defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1055 v4i32x_info, vextract128_extract, 1056 EXTRACT_get_vextract128_imm, [HasVLX]>; 1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1058 v4i32x_info, vextract128_extract, 1059 EXTRACT_get_vextract128_imm, [HasVLX]>; 1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1061 v4i32x_info, vextract128_extract, 1062 EXTRACT_get_vextract128_imm, [HasVLX]>; 1063defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1064 v2i64x_info, vextract128_extract, 1065 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1067 v2i64x_info, vextract128_extract, 1068 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1070 v2i64x_info, vextract128_extract, 1071 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1072 1073defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1074 v4f32x_info, vextract128_extract, 1075 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1076defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1077 v2f64x_info, vextract128_extract, 1078 EXTRACT_get_vextract128_imm, [HasDQI]>; 1079 1080defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1081 v4i32x_info, vextract128_extract, 1082 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1083defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1084 v4i32x_info, vextract128_extract, 1085 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1086defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1087 v4i32x_info, vextract128_extract, 1088 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1089defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1090 v2i64x_info, vextract128_extract, 1091 EXTRACT_get_vextract128_imm, [HasDQI]>; 1092defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1093 v2i64x_info, vextract128_extract, 1094 EXTRACT_get_vextract128_imm, [HasDQI]>; 1095defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1096 v2i64x_info, vextract128_extract, 1097 EXTRACT_get_vextract128_imm, [HasDQI]>; 1098 1099defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1100 v8f32x_info, vextract256_extract, 1101 EXTRACT_get_vextract256_imm, [HasDQI]>; 1102defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1103 v4f64x_info, vextract256_extract, 1104 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1105 1106defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1107 v8i32x_info, vextract256_extract, 1108 EXTRACT_get_vextract256_imm, [HasDQI]>; 1109defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1110 v8i32x_info, vextract256_extract, 1111 EXTRACT_get_vextract256_imm, [HasDQI]>; 1112defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1113 v8i32x_info, vextract256_extract, 1114 EXTRACT_get_vextract256_imm, [HasDQI]>; 1115defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1116 v4i64x_info, vextract256_extract, 1117 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1118defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1119 v4i64x_info, vextract256_extract, 1120 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1121defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1122 v4i64x_info, vextract256_extract, 1123 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1124 1125// vextractps - extract 32 bits from XMM 1126def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1127 (ins VR128X:$src1, u8imm:$src2), 1128 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1129 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1130 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1131 1132def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1133 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1134 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1135 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1136 addr:$dst)]>, 1137 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1138 1139//===---------------------------------------------------------------------===// 1140// AVX-512 BROADCAST 1141//--- 1142// broadcast with a scalar argument. 1143multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, 1144 string Name, 1145 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { 1146 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1147 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1148 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1149 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1150 (X86VBroadcast SrcInfo.FRC:$src), 1151 DestInfo.RC:$src0)), 1152 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1153 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1154 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1155 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1156 (X86VBroadcast SrcInfo.FRC:$src), 1157 DestInfo.ImmAllZerosV)), 1158 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1159 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1160} 1161 1162// Split version to allow mask and broadcast node to be different types. This 1163// helps support the 32x2 broadcasts. 1164multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1165 string Name, 1166 SchedWrite SchedRR, SchedWrite SchedRM, 1167 X86VectorVTInfo MaskInfo, 1168 X86VectorVTInfo DestInfo, 1169 X86VectorVTInfo SrcInfo, 1170 bit IsConvertibleToThreeAddress, 1171 SDPatternOperator UnmaskedOp = X86VBroadcast, 1172 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1173 let hasSideEffects = 0 in 1174 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1175 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1176 [(set MaskInfo.RC:$dst, 1177 (MaskInfo.VT 1178 (bitconvert 1179 (DestInfo.VT 1180 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1181 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1182 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1183 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1184 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1185 "${dst} {${mask}} {z}, $src}"), 1186 [(set MaskInfo.RC:$dst, 1187 (vselect_mask MaskInfo.KRCWM:$mask, 1188 (MaskInfo.VT 1189 (bitconvert 1190 (DestInfo.VT 1191 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1192 MaskInfo.ImmAllZerosV))], 1193 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1194 let Constraints = "$src0 = $dst" in 1195 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1196 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1197 SrcInfo.RC:$src), 1198 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1199 "${dst} {${mask}}, $src}"), 1200 [(set MaskInfo.RC:$dst, 1201 (vselect_mask MaskInfo.KRCWM:$mask, 1202 (MaskInfo.VT 1203 (bitconvert 1204 (DestInfo.VT 1205 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1206 MaskInfo.RC:$src0))], 1207 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1208 1209 let hasSideEffects = 0, mayLoad = 1 in 1210 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1211 (ins SrcInfo.ScalarMemOp:$src), 1212 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1213 [(set MaskInfo.RC:$dst, 1214 (MaskInfo.VT 1215 (bitconvert 1216 (DestInfo.VT 1217 (UnmaskedBcastOp addr:$src)))))], 1218 DestInfo.ExeDomain>, T8PD, EVEX, 1219 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1220 1221 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1222 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1223 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1224 "${dst} {${mask}} {z}, $src}"), 1225 [(set MaskInfo.RC:$dst, 1226 (vselect_mask MaskInfo.KRCWM:$mask, 1227 (MaskInfo.VT 1228 (bitconvert 1229 (DestInfo.VT 1230 (SrcInfo.BroadcastLdFrag addr:$src)))), 1231 MaskInfo.ImmAllZerosV))], 1232 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1233 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1234 1235 let Constraints = "$src0 = $dst", 1236 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1237 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1238 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1239 SrcInfo.ScalarMemOp:$src), 1240 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1241 "${dst} {${mask}}, $src}"), 1242 [(set MaskInfo.RC:$dst, 1243 (vselect_mask MaskInfo.KRCWM:$mask, 1244 (MaskInfo.VT 1245 (bitconvert 1246 (DestInfo.VT 1247 (SrcInfo.BroadcastLdFrag addr:$src)))), 1248 MaskInfo.RC:$src0))], 1249 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1250 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1251} 1252 1253// Helper class to force mask and broadcast result to same type. 1254multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, 1255 SchedWrite SchedRR, SchedWrite SchedRM, 1256 X86VectorVTInfo DestInfo, 1257 X86VectorVTInfo SrcInfo, 1258 bit IsConvertibleToThreeAddress> : 1259 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, 1260 DestInfo, DestInfo, SrcInfo, 1261 IsConvertibleToThreeAddress>; 1262 1263multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1264 AVX512VLVectorVTInfo _> { 1265 let Predicates = [HasAVX512] in { 1266 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1267 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1268 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1269 _.info128>, 1270 EVEX_V512; 1271 } 1272 1273 let Predicates = [HasVLX] in { 1274 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1275 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1276 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1277 _.info128>, 1278 EVEX_V256; 1279 } 1280} 1281 1282multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1283 AVX512VLVectorVTInfo _> { 1284 let Predicates = [HasAVX512] in { 1285 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1286 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1287 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1288 _.info128>, 1289 EVEX_V512; 1290 } 1291 1292 let Predicates = [HasVLX] in { 1293 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1294 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1295 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1296 _.info128>, 1297 EVEX_V256; 1298 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1299 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1300 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, 1301 _.info128>, 1302 EVEX_V128; 1303 } 1304} 1305defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1306 avx512vl_f32_info>; 1307defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1308 avx512vl_f64_info>, VEX_W1X; 1309 1310multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1311 X86VectorVTInfo _, SDPatternOperator OpNode, 1312 RegisterClass SrcRC> { 1313 // Fold with a mask even if it has multiple uses since it is cheap. 1314 let ExeDomain = _.ExeDomain in 1315 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1316 (ins SrcRC:$src), 1317 "vpbroadcast"#_.Suffix, "$src", "$src", 1318 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1319 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1320 T8PD, EVEX, Sched<[SchedRR]>; 1321} 1322 1323multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1324 X86VectorVTInfo _, SDPatternOperator OpNode, 1325 RegisterClass SrcRC, SubRegIndex Subreg> { 1326 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1327 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1328 (outs _.RC:$dst), (ins GR32:$src), 1329 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1330 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1331 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1332 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1333 1334 def : Pat <(_.VT (OpNode SrcRC:$src)), 1335 (!cast<Instruction>(Name#rr) 1336 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1337 1338 // Fold with a mask even if it has multiple uses since it is cheap. 1339 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1340 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1341 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1342 1343 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1344 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1345 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1346} 1347 1348multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1349 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1350 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1351 let Predicates = [prd] in 1352 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1353 OpNode, SrcRC, Subreg>, EVEX_V512; 1354 let Predicates = [prd, HasVLX] in { 1355 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1356 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1357 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1358 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1359 } 1360} 1361 1362multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1363 SDPatternOperator OpNode, 1364 RegisterClass SrcRC, Predicate prd> { 1365 let Predicates = [prd] in 1366 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1367 SrcRC>, EVEX_V512; 1368 let Predicates = [prd, HasVLX] in { 1369 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1370 SrcRC>, EVEX_V256; 1371 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1372 SrcRC>, EVEX_V128; 1373 } 1374} 1375 1376defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1377 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1378defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1379 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1380 HasBWI>; 1381defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1382 X86VBroadcast, GR32, HasAVX512>; 1383defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1384 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1385 1386multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1387 AVX512VLVectorVTInfo _, Predicate prd, 1388 bit IsConvertibleToThreeAddress> { 1389 let Predicates = [prd] in { 1390 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1391 WriteShuffle256Ld, _.info512, _.info128, 1392 IsConvertibleToThreeAddress>, 1393 EVEX_V512; 1394 } 1395 let Predicates = [prd, HasVLX] in { 1396 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1397 WriteShuffle256Ld, _.info256, _.info128, 1398 IsConvertibleToThreeAddress>, 1399 EVEX_V256; 1400 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, 1401 WriteShuffleXLd, _.info128, _.info128, 1402 IsConvertibleToThreeAddress>, 1403 EVEX_V128; 1404 } 1405} 1406 1407defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1408 avx512vl_i8_info, HasBWI, 0>; 1409defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1410 avx512vl_i16_info, HasBWI, 0>; 1411defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1412 avx512vl_i32_info, HasAVX512, 1>; 1413defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1414 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1415 1416multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1417 SDPatternOperator OpNode, 1418 X86VectorVTInfo _Dst, 1419 X86VectorVTInfo _Src> { 1420 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1421 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1422 (_Dst.VT (OpNode addr:$src))>, 1423 Sched<[SchedWriteShuffle.YMM.Folded]>, 1424 AVX5128IBase, EVEX; 1425} 1426 1427// This should be used for the AVX512DQ broadcast instructions. It disables 1428// the unmasked patterns so that we only use the DQ instructions when masking 1429// is requested. 1430multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1431 SDPatternOperator OpNode, 1432 X86VectorVTInfo _Dst, 1433 X86VectorVTInfo _Src> { 1434 let hasSideEffects = 0, mayLoad = 1 in 1435 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1436 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1437 (null_frag), 1438 (_Dst.VT (OpNode addr:$src))>, 1439 Sched<[SchedWriteShuffle.YMM.Folded]>, 1440 AVX5128IBase, EVEX; 1441} 1442 1443//===----------------------------------------------------------------------===// 1444// AVX-512 BROADCAST SUBVECTORS 1445// 1446 1447defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1448 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1449 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1450defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1451 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1452 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1453defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1454 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, 1455 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1456defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1457 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, 1458 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1459 1460let Predicates = [HasAVX512] in { 1461def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1462 (VBROADCASTF64X4rm addr:$src)>; 1463def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1464 (VBROADCASTF64X4rm addr:$src)>; 1465def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1466 (VBROADCASTI64X4rm addr:$src)>; 1467def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1468 (VBROADCASTI64X4rm addr:$src)>; 1469def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1470 (VBROADCASTI64X4rm addr:$src)>; 1471def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1472 (VBROADCASTI64X4rm addr:$src)>; 1473 1474def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1475 (VBROADCASTF32X4rm addr:$src)>; 1476def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1477 (VBROADCASTF32X4rm addr:$src)>; 1478def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1479 (VBROADCASTI32X4rm addr:$src)>; 1480def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1481 (VBROADCASTI32X4rm addr:$src)>; 1482def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1483 (VBROADCASTI32X4rm addr:$src)>; 1484def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1485 (VBROADCASTI32X4rm addr:$src)>; 1486 1487// Patterns for selects of bitcasted operations. 1488def : Pat<(vselect_mask VK16WM:$mask, 1489 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1490 (v16f32 immAllZerosV)), 1491 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1492def : Pat<(vselect_mask VK16WM:$mask, 1493 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1494 VR512:$src0), 1495 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1496def : Pat<(vselect_mask VK16WM:$mask, 1497 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1498 (v16i32 immAllZerosV)), 1499 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1500def : Pat<(vselect_mask VK16WM:$mask, 1501 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1502 VR512:$src0), 1503 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1504 1505def : Pat<(vselect_mask VK8WM:$mask, 1506 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1507 (v8f64 immAllZerosV)), 1508 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1509def : Pat<(vselect_mask VK8WM:$mask, 1510 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1511 VR512:$src0), 1512 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1513def : Pat<(vselect_mask VK8WM:$mask, 1514 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1515 (v8i64 immAllZerosV)), 1516 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1517def : Pat<(vselect_mask VK8WM:$mask, 1518 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1519 VR512:$src0), 1520 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1521} 1522 1523let Predicates = [HasVLX] in { 1524defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1525 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1526 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1527defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1528 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1529 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1530 1531def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1532 (VBROADCASTF32X4Z256rm addr:$src)>; 1533def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1534 (VBROADCASTF32X4Z256rm addr:$src)>; 1535def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1536 (VBROADCASTI32X4Z256rm addr:$src)>; 1537def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1538 (VBROADCASTI32X4Z256rm addr:$src)>; 1539def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1540 (VBROADCASTI32X4Z256rm addr:$src)>; 1541def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1542 (VBROADCASTI32X4Z256rm addr:$src)>; 1543 1544// Patterns for selects of bitcasted operations. 1545def : Pat<(vselect_mask VK8WM:$mask, 1546 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1547 (v8f32 immAllZerosV)), 1548 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1549def : Pat<(vselect_mask VK8WM:$mask, 1550 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1551 VR256X:$src0), 1552 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1553def : Pat<(vselect_mask VK8WM:$mask, 1554 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1555 (v8i32 immAllZerosV)), 1556 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1557def : Pat<(vselect_mask VK8WM:$mask, 1558 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1559 VR256X:$src0), 1560 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1561} 1562 1563let Predicates = [HasVLX, HasDQI] in { 1564defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1565 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, 1566 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1567defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1568 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, 1569 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1570 1571// Patterns for selects of bitcasted operations. 1572def : Pat<(vselect_mask VK4WM:$mask, 1573 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1574 (v4f64 immAllZerosV)), 1575 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1576def : Pat<(vselect_mask VK4WM:$mask, 1577 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1578 VR256X:$src0), 1579 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1580def : Pat<(vselect_mask VK4WM:$mask, 1581 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1582 (v4i64 immAllZerosV)), 1583 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1584def : Pat<(vselect_mask VK4WM:$mask, 1585 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1586 VR256X:$src0), 1587 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1588} 1589 1590let Predicates = [HasDQI] in { 1591defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1592 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, 1593 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1594defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1595 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1596 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1597defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1598 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, 1599 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1600defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1601 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1602 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1603 1604// Patterns for selects of bitcasted operations. 1605def : Pat<(vselect_mask VK16WM:$mask, 1606 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1607 (v16f32 immAllZerosV)), 1608 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1609def : Pat<(vselect_mask VK16WM:$mask, 1610 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1611 VR512:$src0), 1612 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1613def : Pat<(vselect_mask VK16WM:$mask, 1614 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1615 (v16i32 immAllZerosV)), 1616 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1617def : Pat<(vselect_mask VK16WM:$mask, 1618 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1619 VR512:$src0), 1620 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1621 1622def : Pat<(vselect_mask VK8WM:$mask, 1623 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1624 (v8f64 immAllZerosV)), 1625 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1626def : Pat<(vselect_mask VK8WM:$mask, 1627 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1628 VR512:$src0), 1629 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1630def : Pat<(vselect_mask VK8WM:$mask, 1631 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1632 (v8i64 immAllZerosV)), 1633 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1634def : Pat<(vselect_mask VK8WM:$mask, 1635 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1636 VR512:$src0), 1637 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1638} 1639 1640multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1641 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { 1642 let Predicates = [HasDQI] in 1643 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1644 WriteShuffle256Ld, _Dst.info512, 1645 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1646 EVEX_V512; 1647 let Predicates = [HasDQI, HasVLX] in 1648 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1649 WriteShuffle256Ld, _Dst.info256, 1650 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1651 EVEX_V256; 1652} 1653 1654multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1655 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : 1656 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1657 1658 let Predicates = [HasDQI, HasVLX] in 1659 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, 1660 WriteShuffleXLd, _Dst.info128, 1661 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1662 EVEX_V128; 1663} 1664 1665defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1666 avx512vl_i32_info, avx512vl_i64_info>; 1667defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1668 avx512vl_f32_info, avx512vl_f64_info>; 1669 1670//===----------------------------------------------------------------------===// 1671// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1672//--- 1673multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1674 X86VectorVTInfo _, RegisterClass KRC> { 1675 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1676 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1677 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1678 EVEX, Sched<[WriteShuffle]>; 1679} 1680 1681multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1682 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1683 let Predicates = [HasCDI] in 1684 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1685 let Predicates = [HasCDI, HasVLX] in { 1686 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1687 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1688 } 1689} 1690 1691defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1692 avx512vl_i32_info, VK16>; 1693defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1694 avx512vl_i64_info, VK8>, VEX_W; 1695 1696//===----------------------------------------------------------------------===// 1697// -- VPERMI2 - 3 source operands form -- 1698multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1699 X86FoldableSchedWrite sched, 1700 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1701let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1702 hasSideEffects = 0 in { 1703 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1704 (ins _.RC:$src2, _.RC:$src3), 1705 OpcodeStr, "$src3, $src2", "$src2, $src3", 1706 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1707 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1708 1709 let mayLoad = 1 in 1710 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1711 (ins _.RC:$src2, _.MemOp:$src3), 1712 OpcodeStr, "$src3, $src2", "$src2, $src3", 1713 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1714 (_.VT (_.LdFrag addr:$src3)))), 1>, 1715 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1716 } 1717} 1718 1719multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1720 X86FoldableSchedWrite sched, 1721 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1722 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1723 hasSideEffects = 0, mayLoad = 1 in 1724 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1725 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1726 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1727 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1728 (_.VT (X86VPermt2 _.RC:$src2, 1729 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1730 AVX5128IBase, EVEX_4V, EVEX_B, 1731 Sched<[sched.Folded, sched.ReadAfterFold]>; 1732} 1733 1734multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1735 X86FoldableSchedWrite sched, 1736 AVX512VLVectorVTInfo VTInfo, 1737 AVX512VLVectorVTInfo ShuffleMask> { 1738 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1739 ShuffleMask.info512>, 1740 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1741 ShuffleMask.info512>, EVEX_V512; 1742 let Predicates = [HasVLX] in { 1743 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1744 ShuffleMask.info128>, 1745 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1746 ShuffleMask.info128>, EVEX_V128; 1747 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1748 ShuffleMask.info256>, 1749 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1750 ShuffleMask.info256>, EVEX_V256; 1751 } 1752} 1753 1754multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1755 X86FoldableSchedWrite sched, 1756 AVX512VLVectorVTInfo VTInfo, 1757 AVX512VLVectorVTInfo Idx, 1758 Predicate Prd> { 1759 let Predicates = [Prd] in 1760 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1761 Idx.info512>, EVEX_V512; 1762 let Predicates = [Prd, HasVLX] in { 1763 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1764 Idx.info128>, EVEX_V128; 1765 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1766 Idx.info256>, EVEX_V256; 1767 } 1768} 1769 1770defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1771 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1772defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1773 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1774defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1775 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1776 VEX_W, EVEX_CD8<16, CD8VF>; 1777defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1778 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1779 EVEX_CD8<8, CD8VF>; 1780defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1781 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1782defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1783 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1784 1785// Extra patterns to deal with extra bitcasts due to passthru and index being 1786// different types on the fp versions. 1787multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1788 X86VectorVTInfo IdxVT, 1789 X86VectorVTInfo CastVT> { 1790 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1791 (X86VPermt2 (_.VT _.RC:$src2), 1792 (IdxVT.VT (bitconvert 1793 (CastVT.VT _.RC:$src1))), 1794 _.RC:$src3), 1795 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1796 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1797 _.RC:$src2, _.RC:$src3)>; 1798 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1799 (X86VPermt2 _.RC:$src2, 1800 (IdxVT.VT (bitconvert 1801 (CastVT.VT _.RC:$src1))), 1802 (_.LdFrag addr:$src3)), 1803 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1804 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1805 _.RC:$src2, addr:$src3)>; 1806 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1807 (X86VPermt2 _.RC:$src2, 1808 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1809 (_.BroadcastLdFrag addr:$src3)), 1810 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1811 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1812 _.RC:$src2, addr:$src3)>; 1813} 1814 1815// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1816defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1817defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1818defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1819 1820// VPERMT2 1821multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1822 X86FoldableSchedWrite sched, 1823 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1824let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1825 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1826 (ins IdxVT.RC:$src2, _.RC:$src3), 1827 OpcodeStr, "$src3, $src2", "$src2, $src3", 1828 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1829 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1830 1831 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1832 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1833 OpcodeStr, "$src3, $src2", "$src2, $src3", 1834 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1835 (_.LdFrag addr:$src3))), 1>, 1836 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1837 } 1838} 1839multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1840 X86FoldableSchedWrite sched, 1841 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1842 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1843 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1844 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1845 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1846 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1847 (_.VT (X86VPermt2 _.RC:$src1, 1848 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1849 AVX5128IBase, EVEX_4V, EVEX_B, 1850 Sched<[sched.Folded, sched.ReadAfterFold]>; 1851} 1852 1853multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1854 X86FoldableSchedWrite sched, 1855 AVX512VLVectorVTInfo VTInfo, 1856 AVX512VLVectorVTInfo ShuffleMask> { 1857 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1858 ShuffleMask.info512>, 1859 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1860 ShuffleMask.info512>, EVEX_V512; 1861 let Predicates = [HasVLX] in { 1862 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1863 ShuffleMask.info128>, 1864 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1865 ShuffleMask.info128>, EVEX_V128; 1866 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1867 ShuffleMask.info256>, 1868 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1869 ShuffleMask.info256>, EVEX_V256; 1870 } 1871} 1872 1873multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1874 X86FoldableSchedWrite sched, 1875 AVX512VLVectorVTInfo VTInfo, 1876 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1877 let Predicates = [Prd] in 1878 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1879 Idx.info512>, EVEX_V512; 1880 let Predicates = [Prd, HasVLX] in { 1881 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1882 Idx.info128>, EVEX_V128; 1883 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1884 Idx.info256>, EVEX_V256; 1885 } 1886} 1887 1888defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1889 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1890defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1891 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1892defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1893 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1894 VEX_W, EVEX_CD8<16, CD8VF>; 1895defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1896 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1897 EVEX_CD8<8, CD8VF>; 1898defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1899 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1900defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1901 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1902 1903//===----------------------------------------------------------------------===// 1904// AVX-512 - BLEND using mask 1905// 1906 1907multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1908 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1909 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1910 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1911 (ins _.RC:$src1, _.RC:$src2), 1912 !strconcat(OpcodeStr, 1913 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1914 EVEX_4V, Sched<[sched]>; 1915 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1916 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1917 !strconcat(OpcodeStr, 1918 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1919 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1920 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1921 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1922 !strconcat(OpcodeStr, 1923 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1924 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1925 let mayLoad = 1 in { 1926 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1927 (ins _.RC:$src1, _.MemOp:$src2), 1928 !strconcat(OpcodeStr, 1929 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1930 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1931 Sched<[sched.Folded, sched.ReadAfterFold]>; 1932 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1933 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1934 !strconcat(OpcodeStr, 1935 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1936 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1937 Sched<[sched.Folded, sched.ReadAfterFold]>; 1938 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1939 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1940 !strconcat(OpcodeStr, 1941 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1942 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1943 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1944 } 1945 } 1946} 1947multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1948 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1949 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1950 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1951 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1952 !strconcat(OpcodeStr, 1953 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1954 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1955 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1956 Sched<[sched.Folded, sched.ReadAfterFold]>; 1957 1958 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1959 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1960 !strconcat(OpcodeStr, 1961 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1962 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1963 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1964 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1965 1966 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1967 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1968 !strconcat(OpcodeStr, 1969 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1970 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1971 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1972 Sched<[sched.Folded, sched.ReadAfterFold]>; 1973 } 1974} 1975 1976multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1977 AVX512VLVectorVTInfo VTInfo> { 1978 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1979 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1980 EVEX_V512; 1981 1982 let Predicates = [HasVLX] in { 1983 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1984 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1985 EVEX_V256; 1986 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1987 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1988 EVEX_V128; 1989 } 1990} 1991 1992multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1993 AVX512VLVectorVTInfo VTInfo> { 1994 let Predicates = [HasBWI] in 1995 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1996 EVEX_V512; 1997 1998 let Predicates = [HasBWI, HasVLX] in { 1999 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2000 EVEX_V256; 2001 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2002 EVEX_V128; 2003 } 2004} 2005 2006defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2007 avx512vl_f32_info>; 2008defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2009 avx512vl_f64_info>, VEX_W; 2010defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2011 avx512vl_i32_info>; 2012defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2013 avx512vl_i64_info>, VEX_W; 2014defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2015 avx512vl_i8_info>; 2016defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2017 avx512vl_i16_info>, VEX_W; 2018 2019//===----------------------------------------------------------------------===// 2020// Compare Instructions 2021//===----------------------------------------------------------------------===// 2022 2023// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2024 2025multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2026 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2027 X86FoldableSchedWrite sched> { 2028 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2029 (outs _.KRC:$dst), 2030 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2031 "vcmp"#_.Suffix, 2032 "$cc, $src2, $src1", "$src1, $src2, $cc", 2033 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2034 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2035 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2036 let mayLoad = 1 in 2037 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2038 (outs _.KRC:$dst), 2039 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2040 "vcmp"#_.Suffix, 2041 "$cc, $src2, $src1", "$src1, $src2, $cc", 2042 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2043 timm:$cc), 2044 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2045 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2046 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2047 2048 let Uses = [MXCSR] in 2049 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2050 (outs _.KRC:$dst), 2051 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2052 "vcmp"#_.Suffix, 2053 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2054 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2055 timm:$cc), 2056 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2057 timm:$cc)>, 2058 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2059 2060 let isCodeGenOnly = 1 in { 2061 let isCommutable = 1 in 2062 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2063 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2064 !strconcat("vcmp", _.Suffix, 2065 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2066 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2067 _.FRC:$src2, 2068 timm:$cc))]>, 2069 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2070 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2071 (outs _.KRC:$dst), 2072 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2073 !strconcat("vcmp", _.Suffix, 2074 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2075 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2076 (_.ScalarLdFrag addr:$src2), 2077 timm:$cc))]>, 2078 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2079 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2080 } 2081} 2082 2083def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2084 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2085 return N->hasOneUse(); 2086}]>; 2087def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2088 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2089 return N->hasOneUse(); 2090}]>; 2091 2092let Predicates = [HasAVX512] in { 2093 let ExeDomain = SSEPackedSingle in 2094 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2095 X86cmpms_su, X86cmpmsSAE_su, 2096 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2097 let ExeDomain = SSEPackedDouble in 2098 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2099 X86cmpms_su, X86cmpmsSAE_su, 2100 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2101} 2102 2103multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2104 X86FoldableSchedWrite sched, 2105 X86VectorVTInfo _, bit IsCommutable> { 2106 let isCommutable = IsCommutable, hasSideEffects = 0 in 2107 def rr : AVX512BI<opc, MRMSrcReg, 2108 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2109 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2110 []>, EVEX_4V, Sched<[sched]>; 2111 let mayLoad = 1, hasSideEffects = 0 in 2112 def rm : AVX512BI<opc, MRMSrcMem, 2113 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2114 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2115 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2116 let isCommutable = IsCommutable, hasSideEffects = 0 in 2117 def rrk : AVX512BI<opc, MRMSrcReg, 2118 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2119 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2120 "$dst {${mask}}, $src1, $src2}"), 2121 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2122 let mayLoad = 1, hasSideEffects = 0 in 2123 def rmk : AVX512BI<opc, MRMSrcMem, 2124 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2125 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2126 "$dst {${mask}}, $src1, $src2}"), 2127 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2128} 2129 2130multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2131 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2132 bit IsCommutable> : 2133 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2134 let mayLoad = 1, hasSideEffects = 0 in { 2135 def rmb : AVX512BI<opc, MRMSrcMem, 2136 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2137 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2138 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2139 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2140 def rmbk : AVX512BI<opc, MRMSrcMem, 2141 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2142 _.ScalarMemOp:$src2), 2143 !strconcat(OpcodeStr, 2144 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2145 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2146 []>, EVEX_4V, EVEX_K, EVEX_B, 2147 Sched<[sched.Folded, sched.ReadAfterFold]>; 2148 } 2149} 2150 2151multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2152 X86SchedWriteWidths sched, 2153 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2154 bit IsCommutable = 0> { 2155 let Predicates = [prd] in 2156 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2157 VTInfo.info512, IsCommutable>, EVEX_V512; 2158 2159 let Predicates = [prd, HasVLX] in { 2160 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2161 VTInfo.info256, IsCommutable>, EVEX_V256; 2162 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2163 VTInfo.info128, IsCommutable>, EVEX_V128; 2164 } 2165} 2166 2167multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2168 X86SchedWriteWidths sched, 2169 AVX512VLVectorVTInfo VTInfo, 2170 Predicate prd, bit IsCommutable = 0> { 2171 let Predicates = [prd] in 2172 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2173 VTInfo.info512, IsCommutable>, EVEX_V512; 2174 2175 let Predicates = [prd, HasVLX] in { 2176 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2177 VTInfo.info256, IsCommutable>, EVEX_V256; 2178 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2179 VTInfo.info128, IsCommutable>, EVEX_V128; 2180 } 2181} 2182 2183// This fragment treats X86cmpm as commutable to help match loads in both 2184// operands for PCMPEQ. 2185def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2186def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2187 (setcc node:$src1, node:$src2, SETGT)>; 2188 2189// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2190// increase the pattern complexity the way an immediate would. 2191let AddedComplexity = 2 in { 2192// FIXME: Is there a better scheduler class for VPCMP? 2193defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2194 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2195 EVEX_CD8<8, CD8VF>, VEX_WIG; 2196 2197defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2198 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2199 EVEX_CD8<16, CD8VF>, VEX_WIG; 2200 2201defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2202 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2203 EVEX_CD8<32, CD8VF>; 2204 2205defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2206 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2207 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2208 2209defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2210 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2211 EVEX_CD8<8, CD8VF>, VEX_WIG; 2212 2213defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2214 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2215 EVEX_CD8<16, CD8VF>, VEX_WIG; 2216 2217defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2218 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2219 EVEX_CD8<32, CD8VF>; 2220 2221defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2222 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2223 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2224} 2225 2226def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2227 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2228 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2229 return getI8Imm(SSECC, SDLoc(N)); 2230}]>; 2231 2232// Swapped operand version of the above. 2233def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2234 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2235 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2236 SSECC = X86::getSwappedVPCMPImm(SSECC); 2237 return getI8Imm(SSECC, SDLoc(N)); 2238}]>; 2239 2240multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2241 PatFrag Frag_su, 2242 X86FoldableSchedWrite sched, 2243 X86VectorVTInfo _, string Name> { 2244 let isCommutable = 1 in 2245 def rri : AVX512AIi8<opc, MRMSrcReg, 2246 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2247 !strconcat("vpcmp", Suffix, 2248 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2249 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2250 (_.VT _.RC:$src2), 2251 cond)))]>, 2252 EVEX_4V, Sched<[sched]>; 2253 def rmi : AVX512AIi8<opc, MRMSrcMem, 2254 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2255 !strconcat("vpcmp", Suffix, 2256 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2257 [(set _.KRC:$dst, (_.KVT 2258 (Frag:$cc 2259 (_.VT _.RC:$src1), 2260 (_.VT (_.LdFrag addr:$src2)), 2261 cond)))]>, 2262 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2263 let isCommutable = 1 in 2264 def rrik : AVX512AIi8<opc, MRMSrcReg, 2265 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2266 u8imm:$cc), 2267 !strconcat("vpcmp", Suffix, 2268 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2269 "$dst {${mask}}, $src1, $src2, $cc}"), 2270 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2271 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2272 (_.VT _.RC:$src2), 2273 cond))))]>, 2274 EVEX_4V, EVEX_K, Sched<[sched]>; 2275 def rmik : AVX512AIi8<opc, MRMSrcMem, 2276 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2277 u8imm:$cc), 2278 !strconcat("vpcmp", Suffix, 2279 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2280 "$dst {${mask}}, $src1, $src2, $cc}"), 2281 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2282 (_.KVT 2283 (Frag_su:$cc 2284 (_.VT _.RC:$src1), 2285 (_.VT (_.LdFrag addr:$src2)), 2286 cond))))]>, 2287 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2288 2289 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2290 (_.VT _.RC:$src1), cond)), 2291 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2292 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2293 2294 def : Pat<(and _.KRCWM:$mask, 2295 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2296 (_.VT _.RC:$src1), cond))), 2297 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2298 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2299 (X86pcmpm_imm_commute $cc))>; 2300} 2301 2302multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2303 PatFrag Frag_su, X86FoldableSchedWrite sched, 2304 X86VectorVTInfo _, string Name> : 2305 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2306 def rmib : AVX512AIi8<opc, MRMSrcMem, 2307 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2308 u8imm:$cc), 2309 !strconcat("vpcmp", Suffix, 2310 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2311 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2312 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2313 (_.VT _.RC:$src1), 2314 (_.BroadcastLdFrag addr:$src2), 2315 cond)))]>, 2316 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2317 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2318 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2319 _.ScalarMemOp:$src2, u8imm:$cc), 2320 !strconcat("vpcmp", Suffix, 2321 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2322 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2323 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2324 (_.KVT (Frag_su:$cc 2325 (_.VT _.RC:$src1), 2326 (_.BroadcastLdFrag addr:$src2), 2327 cond))))]>, 2328 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2329 2330 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2331 (_.VT _.RC:$src1), cond)), 2332 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2333 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2334 2335 def : Pat<(and _.KRCWM:$mask, 2336 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2337 (_.VT _.RC:$src1), cond))), 2338 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2339 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2340 (X86pcmpm_imm_commute $cc))>; 2341} 2342 2343multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2344 PatFrag Frag_su, X86SchedWriteWidths sched, 2345 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2346 let Predicates = [prd] in 2347 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2348 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2349 2350 let Predicates = [prd, HasVLX] in { 2351 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2352 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2353 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2354 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2355 } 2356} 2357 2358multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2359 PatFrag Frag_su, X86SchedWriteWidths sched, 2360 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2361 let Predicates = [prd] in 2362 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2363 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2364 2365 let Predicates = [prd, HasVLX] in { 2366 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2367 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2368 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2369 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2370 } 2371} 2372 2373def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2374 (setcc node:$src1, node:$src2, node:$cc), [{ 2375 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2376 return !ISD::isUnsignedIntSetCC(CC); 2377}], X86pcmpm_imm>; 2378 2379def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2380 (setcc node:$src1, node:$src2, node:$cc), [{ 2381 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2382 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2383}], X86pcmpm_imm>; 2384 2385def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2386 (setcc node:$src1, node:$src2, node:$cc), [{ 2387 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2388 return ISD::isUnsignedIntSetCC(CC); 2389}], X86pcmpm_imm>; 2390 2391def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2392 (setcc node:$src1, node:$src2, node:$cc), [{ 2393 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2394 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2395}], X86pcmpm_imm>; 2396 2397// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2398defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2399 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2400 EVEX_CD8<8, CD8VF>; 2401defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2402 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2403 EVEX_CD8<8, CD8VF>; 2404 2405defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2406 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2407 VEX_W, EVEX_CD8<16, CD8VF>; 2408defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2409 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2410 VEX_W, EVEX_CD8<16, CD8VF>; 2411 2412defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2413 SchedWriteVecALU, avx512vl_i32_info, 2414 HasAVX512>, EVEX_CD8<32, CD8VF>; 2415defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2416 SchedWriteVecALU, avx512vl_i32_info, 2417 HasAVX512>, EVEX_CD8<32, CD8VF>; 2418 2419defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2420 SchedWriteVecALU, avx512vl_i64_info, 2421 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2422defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2423 SchedWriteVecALU, avx512vl_i64_info, 2424 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2425 2426def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2427 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2428 return N->hasOneUse(); 2429}]>; 2430 2431def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2432 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2433 return getI8Imm(Imm, SDLoc(N)); 2434}]>; 2435 2436multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2437 string Name> { 2438let Uses = [MXCSR], mayRaiseFPException = 1 in { 2439 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2440 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2441 "vcmp"#_.Suffix, 2442 "$cc, $src2, $src1", "$src1, $src2, $cc", 2443 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2444 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2445 1>, Sched<[sched]>; 2446 2447 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2448 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2449 "vcmp"#_.Suffix, 2450 "$cc, $src2, $src1", "$src1, $src2, $cc", 2451 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2452 timm:$cc), 2453 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2454 timm:$cc)>, 2455 Sched<[sched.Folded, sched.ReadAfterFold]>; 2456 2457 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2458 (outs _.KRC:$dst), 2459 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2460 "vcmp"#_.Suffix, 2461 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2462 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2463 (X86any_cmpm (_.VT _.RC:$src1), 2464 (_.VT (_.BroadcastLdFrag addr:$src2)), 2465 timm:$cc), 2466 (X86cmpm_su (_.VT _.RC:$src1), 2467 (_.VT (_.BroadcastLdFrag addr:$src2)), 2468 timm:$cc)>, 2469 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2470 } 2471 2472 // Patterns for selecting with loads in other operand. 2473 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2474 timm:$cc), 2475 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2476 (X86cmpm_imm_commute timm:$cc))>; 2477 2478 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2479 (_.VT _.RC:$src1), 2480 timm:$cc)), 2481 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2482 _.RC:$src1, addr:$src2, 2483 (X86cmpm_imm_commute timm:$cc))>; 2484 2485 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2486 (_.VT _.RC:$src1), timm:$cc), 2487 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2488 (X86cmpm_imm_commute timm:$cc))>; 2489 2490 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2491 (_.VT _.RC:$src1), 2492 timm:$cc)), 2493 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2494 _.RC:$src1, addr:$src2, 2495 (X86cmpm_imm_commute timm:$cc))>; 2496 2497 // Patterns for mask intrinsics. 2498 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2499 (_.KVT immAllOnesV)), 2500 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2501 2502 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2503 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2504 _.RC:$src2, timm:$cc)>; 2505 2506 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2507 (_.KVT immAllOnesV)), 2508 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2509 2510 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2511 _.KRCWM:$mask), 2512 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2513 addr:$src2, timm:$cc)>; 2514 2515 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2516 (_.KVT immAllOnesV)), 2517 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2518 2519 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2520 _.KRCWM:$mask), 2521 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2522 addr:$src2, timm:$cc)>; 2523 2524 // Patterns for mask intrinsics with loads in other operand. 2525 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2526 (_.KVT immAllOnesV)), 2527 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2528 (X86cmpm_imm_commute timm:$cc))>; 2529 2530 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2531 _.KRCWM:$mask), 2532 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2533 _.RC:$src1, addr:$src2, 2534 (X86cmpm_imm_commute timm:$cc))>; 2535 2536 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2537 (_.KVT immAllOnesV)), 2538 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2539 (X86cmpm_imm_commute timm:$cc))>; 2540 2541 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2542 _.KRCWM:$mask), 2543 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2544 _.RC:$src1, addr:$src2, 2545 (X86cmpm_imm_commute timm:$cc))>; 2546} 2547 2548multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2549 // comparison code form (VCMP[EQ/LT/LE/...] 2550 let Uses = [MXCSR] in 2551 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2552 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2553 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2554 "vcmp"#_.Suffix, 2555 "$cc, {sae}, $src2, $src1", 2556 "$src1, $src2, {sae}, $cc", 2557 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2558 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2559 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2560 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2561 EVEX_B, Sched<[sched]>; 2562} 2563 2564multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 2565 let Predicates = [HasAVX512] in { 2566 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2567 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2568 2569 } 2570 let Predicates = [HasAVX512,HasVLX] in { 2571 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2572 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2573 } 2574} 2575 2576defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2577 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2578defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2579 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2580 2581// Patterns to select fp compares with load as first operand. 2582let Predicates = [HasAVX512] in { 2583 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, 2584 timm:$cc)), 2585 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2586 2587 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, 2588 timm:$cc)), 2589 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2590} 2591 2592// ---------------------------------------------------------------- 2593// FPClass 2594 2595def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2596 (X86Vfpclasss node:$src1, node:$src2), [{ 2597 return N->hasOneUse(); 2598}]>; 2599 2600def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2601 (X86Vfpclass node:$src1, node:$src2), [{ 2602 return N->hasOneUse(); 2603}]>; 2604 2605//handle fpclass instruction mask = op(reg_scalar,imm) 2606// op(mem_scalar,imm) 2607multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2608 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2609 Predicate prd> { 2610 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2611 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2612 (ins _.RC:$src1, i32u8imm:$src2), 2613 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2614 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2615 (i32 timm:$src2)))]>, 2616 Sched<[sched]>; 2617 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2618 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2619 OpcodeStr#_.Suffix# 2620 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2621 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2622 (X86Vfpclasss_su (_.VT _.RC:$src1), 2623 (i32 timm:$src2))))]>, 2624 EVEX_K, Sched<[sched]>; 2625 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2626 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2627 OpcodeStr#_.Suffix# 2628 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2629 [(set _.KRC:$dst, 2630 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2631 (i32 timm:$src2)))]>, 2632 Sched<[sched.Folded, sched.ReadAfterFold]>; 2633 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2634 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2635 OpcodeStr#_.Suffix# 2636 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2637 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2638 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2639 (i32 timm:$src2))))]>, 2640 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2641 } 2642} 2643 2644//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2645// fpclass(reg_vec, mem_vec, imm) 2646// fpclass(reg_vec, broadcast(eltVt), imm) 2647multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2648 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2649 string mem>{ 2650 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2651 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2652 (ins _.RC:$src1, i32u8imm:$src2), 2653 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2654 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2655 (i32 timm:$src2)))]>, 2656 Sched<[sched]>; 2657 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2658 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2659 OpcodeStr#_.Suffix# 2660 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2661 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2662 (X86Vfpclass_su (_.VT _.RC:$src1), 2663 (i32 timm:$src2))))]>, 2664 EVEX_K, Sched<[sched]>; 2665 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2666 (ins _.MemOp:$src1, i32u8imm:$src2), 2667 OpcodeStr#_.Suffix#"{"#mem#"}"# 2668 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2669 [(set _.KRC:$dst,(X86Vfpclass 2670 (_.VT (_.LdFrag addr:$src1)), 2671 (i32 timm:$src2)))]>, 2672 Sched<[sched.Folded, sched.ReadAfterFold]>; 2673 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2674 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2675 OpcodeStr#_.Suffix#"{"#mem#"}"# 2676 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2677 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2678 (_.VT (_.LdFrag addr:$src1)), 2679 (i32 timm:$src2))))]>, 2680 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2681 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2682 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2683 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2684 _.BroadcastStr#", $dst|$dst, ${src1}" 2685 #_.BroadcastStr#", $src2}", 2686 [(set _.KRC:$dst,(X86Vfpclass 2687 (_.VT (_.BroadcastLdFrag addr:$src1)), 2688 (i32 timm:$src2)))]>, 2689 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2690 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2691 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2692 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2693 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2694 _.BroadcastStr#", $src2}", 2695 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2696 (_.VT (_.BroadcastLdFrag addr:$src1)), 2697 (i32 timm:$src2))))]>, 2698 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2699 } 2700 2701 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2702 // the memory form. 2703 def : InstAlias<OpcodeStr#_.Suffix#mem# 2704 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2705 (!cast<Instruction>(NAME#"rr") 2706 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2707 def : InstAlias<OpcodeStr#_.Suffix#mem# 2708 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2709 (!cast<Instruction>(NAME#"rrk") 2710 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2711 def : InstAlias<OpcodeStr#_.Suffix#mem# 2712 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2713 _.BroadcastStr#", $src2}", 2714 (!cast<Instruction>(NAME#"rmb") 2715 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2716 def : InstAlias<OpcodeStr#_.Suffix#mem# 2717 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2718 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2719 (!cast<Instruction>(NAME#"rmbk") 2720 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2721} 2722 2723multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2724 bits<8> opc, X86SchedWriteWidths sched, 2725 Predicate prd>{ 2726 let Predicates = [prd] in { 2727 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2728 _.info512, "z">, EVEX_V512; 2729 } 2730 let Predicates = [prd, HasVLX] in { 2731 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2732 _.info128, "x">, EVEX_V128; 2733 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2734 _.info256, "y">, EVEX_V256; 2735 } 2736} 2737 2738multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2739 bits<8> opcScalar, X86SchedWriteWidths sched, 2740 Predicate prd> { 2741 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2742 sched, prd>, 2743 EVEX_CD8<32, CD8VF>; 2744 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2745 sched, prd>, 2746 EVEX_CD8<64, CD8VF> , VEX_W; 2747 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2748 sched.Scl, f32x_info, prd>, VEX_LIG, 2749 EVEX_CD8<32, CD8VT1>; 2750 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2751 sched.Scl, f64x_info, prd>, VEX_LIG, 2752 EVEX_CD8<64, CD8VT1>, VEX_W; 2753} 2754 2755defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp, 2756 HasDQI>, AVX512AIi8Base, EVEX; 2757 2758//----------------------------------------------------------------- 2759// Mask register copy, including 2760// - copy between mask registers 2761// - load/store mask registers 2762// - copy from GPR to mask register and vice versa 2763// 2764multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2765 string OpcodeStr, RegisterClass KRC, 2766 ValueType vvt, X86MemOperand x86memop> { 2767 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2768 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2769 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2770 Sched<[WriteMove]>; 2771 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2772 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2773 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2774 Sched<[WriteLoad]>; 2775 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2776 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2777 [(store KRC:$src, addr:$dst)]>, 2778 Sched<[WriteStore]>; 2779} 2780 2781multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2782 string OpcodeStr, 2783 RegisterClass KRC, RegisterClass GRC> { 2784 let hasSideEffects = 0 in { 2785 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2786 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2787 Sched<[WriteMove]>; 2788 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2789 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2790 Sched<[WriteMove]>; 2791 } 2792} 2793 2794let Predicates = [HasDQI] in 2795 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2796 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2797 VEX, PD; 2798 2799let Predicates = [HasAVX512] in 2800 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2801 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2802 VEX, PS; 2803 2804let Predicates = [HasBWI] in { 2805 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2806 VEX, PD, VEX_W; 2807 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2808 VEX, XD; 2809 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2810 VEX, PS, VEX_W; 2811 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2812 VEX, XD, VEX_W; 2813} 2814 2815// GR from/to mask register 2816def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2817 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2818def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2819 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2820def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2821 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2822 2823def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2824 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2825def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2826 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2827 2828def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2829 (KMOVWrk VK16:$src)>; 2830def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2831 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2832def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2833 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2834def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2835 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2836 2837def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2838 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2839def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2840 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2841def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2842 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2843def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2844 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2845 2846def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2847 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2848def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2849 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2850def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2851 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2852def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2853 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2854 2855// Load/store kreg 2856let Predicates = [HasDQI] in { 2857 def : Pat<(v1i1 (load addr:$src)), 2858 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2859 def : Pat<(v2i1 (load addr:$src)), 2860 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2861 def : Pat<(v4i1 (load addr:$src)), 2862 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2863} 2864 2865let Predicates = [HasAVX512] in { 2866 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2867 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2868 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2869 (KMOVWkm addr:$src)>; 2870} 2871 2872def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2873 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2874 SDTCVecEltisVT<1, i1>, 2875 SDTCisPtrTy<2>]>>; 2876 2877let Predicates = [HasAVX512] in { 2878 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2879 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2880 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2881 2882 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2883 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2884 2885 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2886 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2887 2888 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2889 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2890 } 2891 2892 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2893 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2894 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2895 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2896 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2897 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2898 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2899 2900 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2901 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2902 (KMOVWkr (AND32ri8 2903 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2904 (i32 1)))>; 2905} 2906 2907// Mask unary operation 2908// - KNOT 2909multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2910 RegisterClass KRC, SDPatternOperator OpNode, 2911 X86FoldableSchedWrite sched, Predicate prd> { 2912 let Predicates = [prd] in 2913 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2914 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2915 [(set KRC:$dst, (OpNode KRC:$src))]>, 2916 Sched<[sched]>; 2917} 2918 2919multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2920 SDPatternOperator OpNode, 2921 X86FoldableSchedWrite sched> { 2922 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2923 sched, HasDQI>, VEX, PD; 2924 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2925 sched, HasAVX512>, VEX, PS; 2926 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2927 sched, HasBWI>, VEX, PD, VEX_W; 2928 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2929 sched, HasBWI>, VEX, PS, VEX_W; 2930} 2931 2932// TODO - do we need a X86SchedWriteWidths::KMASK type? 2933defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2934 2935// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2936let Predicates = [HasAVX512, NoDQI] in 2937def : Pat<(vnot VK8:$src), 2938 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2939 2940def : Pat<(vnot VK4:$src), 2941 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2942def : Pat<(vnot VK2:$src), 2943 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2944def : Pat<(vnot VK1:$src), 2945 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 2946 2947// Mask binary operation 2948// - KAND, KANDN, KOR, KXNOR, KXOR 2949multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2950 RegisterClass KRC, SDPatternOperator OpNode, 2951 X86FoldableSchedWrite sched, Predicate prd, 2952 bit IsCommutable> { 2953 let Predicates = [prd], isCommutable = IsCommutable in 2954 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2955 !strconcat(OpcodeStr, 2956 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2957 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2958 Sched<[sched]>; 2959} 2960 2961multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2962 SDPatternOperator OpNode, 2963 X86FoldableSchedWrite sched, bit IsCommutable, 2964 Predicate prdW = HasAVX512> { 2965 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2966 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 2967 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2968 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 2969 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2970 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 2971 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2972 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 2973} 2974 2975// These nodes use 'vnot' instead of 'not' to support vectors. 2976def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 2977def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 2978 2979// TODO - do we need a X86SchedWriteWidths::KMASK type? 2980defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 2981defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 2982defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 2983defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 2984defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 2985defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 2986 2987multiclass avx512_binop_pat<SDPatternOperator VOpNode, 2988 Instruction Inst> { 2989 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 2990 // for the DQI set, this type is legal and KxxxB instruction is used 2991 let Predicates = [NoDQI] in 2992 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 2993 (COPY_TO_REGCLASS 2994 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 2995 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 2996 2997 // All types smaller than 8 bits require conversion anyway 2998 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 2999 (COPY_TO_REGCLASS (Inst 3000 (COPY_TO_REGCLASS VK1:$src1, VK16), 3001 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3002 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3003 (COPY_TO_REGCLASS (Inst 3004 (COPY_TO_REGCLASS VK2:$src1, VK16), 3005 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 3006 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3007 (COPY_TO_REGCLASS (Inst 3008 (COPY_TO_REGCLASS VK4:$src1, VK16), 3009 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 3010} 3011 3012defm : avx512_binop_pat<and, KANDWrr>; 3013defm : avx512_binop_pat<vandn, KANDNWrr>; 3014defm : avx512_binop_pat<or, KORWrr>; 3015defm : avx512_binop_pat<vxnor, KXNORWrr>; 3016defm : avx512_binop_pat<xor, KXORWrr>; 3017 3018// Mask unpacking 3019multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3020 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3021 Predicate prd> { 3022 let Predicates = [prd] in { 3023 let hasSideEffects = 0 in 3024 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3025 (ins Src.KRC:$src1, Src.KRC:$src2), 3026 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3027 VEX_4V, VEX_L, Sched<[sched]>; 3028 3029 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3030 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 3031 } 3032} 3033 3034defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3035defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3036defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3037 3038// Mask bit testing 3039multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3040 SDNode OpNode, X86FoldableSchedWrite sched, 3041 Predicate prd> { 3042 let Predicates = [prd], Defs = [EFLAGS] in 3043 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3044 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3045 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3046 Sched<[sched]>; 3047} 3048 3049multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3050 X86FoldableSchedWrite sched, 3051 Predicate prdW = HasAVX512> { 3052 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3053 VEX, PD; 3054 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3055 VEX, PS; 3056 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3057 VEX, PS, VEX_W; 3058 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3059 VEX, PD, VEX_W; 3060} 3061 3062// TODO - do we need a X86SchedWriteWidths::KMASK type? 3063defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3064defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3065 3066// Mask shift 3067multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3068 SDNode OpNode, X86FoldableSchedWrite sched> { 3069 let Predicates = [HasAVX512] in 3070 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3071 !strconcat(OpcodeStr, 3072 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3073 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3074 Sched<[sched]>; 3075} 3076 3077multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3078 SDNode OpNode, X86FoldableSchedWrite sched> { 3079 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3080 sched>, VEX, TAPD, VEX_W; 3081 let Predicates = [HasDQI] in 3082 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3083 sched>, VEX, TAPD; 3084 let Predicates = [HasBWI] in { 3085 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3086 sched>, VEX, TAPD, VEX_W; 3087 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3088 sched>, VEX, TAPD; 3089 } 3090} 3091 3092defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3093defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3094 3095// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3096multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3097 string InstStr, 3098 X86VectorVTInfo Narrow, 3099 X86VectorVTInfo Wide> { 3100def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3101 (Narrow.VT Narrow.RC:$src2), cond)), 3102 (COPY_TO_REGCLASS 3103 (!cast<Instruction>(InstStr#"Zrri") 3104 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3105 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3106 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3107 3108def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3109 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3110 (Narrow.VT Narrow.RC:$src2), 3111 cond)))), 3112 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3113 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3114 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3115 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3116 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3117} 3118 3119multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3120 string InstStr, 3121 X86VectorVTInfo Narrow, 3122 X86VectorVTInfo Wide> { 3123// Broadcast load. 3124def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3125 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3126 (COPY_TO_REGCLASS 3127 (!cast<Instruction>(InstStr#"Zrmib") 3128 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3129 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3130 3131def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3132 (Narrow.KVT 3133 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3134 (Narrow.BroadcastLdFrag addr:$src2), 3135 cond)))), 3136 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3137 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3138 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3139 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3140 3141// Commuted with broadcast load. 3142def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3143 (Narrow.VT Narrow.RC:$src1), 3144 cond)), 3145 (COPY_TO_REGCLASS 3146 (!cast<Instruction>(InstStr#"Zrmib") 3147 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3148 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3149 3150def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3151 (Narrow.KVT 3152 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3153 (Narrow.VT Narrow.RC:$src1), 3154 cond)))), 3155 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3156 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3157 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3158 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3159} 3160 3161// Same as above, but for fp types which don't use PatFrags. 3162multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3163 X86VectorVTInfo Narrow, 3164 X86VectorVTInfo Wide> { 3165def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3166 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3167 (COPY_TO_REGCLASS 3168 (!cast<Instruction>(InstStr#"Zrri") 3169 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3170 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3171 timm:$cc), Narrow.KRC)>; 3172 3173def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3174 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3175 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3176 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3177 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3178 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3179 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3180 timm:$cc), Narrow.KRC)>; 3181 3182// Broadcast load. 3183def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3184 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3185 (COPY_TO_REGCLASS 3186 (!cast<Instruction>(InstStr#"Zrmbi") 3187 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3188 addr:$src2, timm:$cc), Narrow.KRC)>; 3189 3190def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3191 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3192 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3193 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3194 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3195 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3196 addr:$src2, timm:$cc), Narrow.KRC)>; 3197 3198// Commuted with broadcast load. 3199def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3200 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3201 (COPY_TO_REGCLASS 3202 (!cast<Instruction>(InstStr#"Zrmbi") 3203 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3204 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3205 3206def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3207 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3208 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3209 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3210 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3211 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3212 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3213} 3214 3215let Predicates = [HasAVX512, NoVLX] in { 3216 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3217 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3218 3219 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3220 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3221 3222 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3223 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3224 3225 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3226 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3227 3228 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3229 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3230 3231 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3232 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3233 3234 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3235 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3236 3237 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3238 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3239 3240 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3241 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3242 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3243 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3244} 3245 3246let Predicates = [HasBWI, NoVLX] in { 3247 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3248 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3249 3250 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3251 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3252 3253 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3254 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3255 3256 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3257 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3258} 3259 3260// Mask setting all 0s or 1s 3261multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3262 let Predicates = [HasAVX512] in 3263 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3264 SchedRW = [WriteZero] in 3265 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3266 [(set KRC:$dst, (VT Val))]>; 3267} 3268 3269multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3270 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3271 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3272 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3273} 3274 3275defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3276defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3277 3278// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3279let Predicates = [HasAVX512] in { 3280 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3281 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3282 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3283 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3284 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3285 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3286 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3287 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3288} 3289 3290// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3291multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3292 RegisterClass RC, ValueType VT> { 3293 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3294 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3295 3296 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3297 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3298} 3299defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3300defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3301defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3302defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3303defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3304defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3305 3306defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3307defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3308defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3309defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3310defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3311 3312defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3313defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3314defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3315defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3316 3317defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3318defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3319defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3320 3321defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3322defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3323 3324defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3325 3326//===----------------------------------------------------------------------===// 3327// AVX-512 - Aligned and unaligned load and store 3328// 3329 3330multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3331 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3332 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3333 bit NoRMPattern = 0, 3334 SDPatternOperator SelectOprr = vselect> { 3335 let hasSideEffects = 0 in { 3336 let isMoveReg = 1 in 3337 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3338 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3339 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3340 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3341 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3342 (ins _.KRCWM:$mask, _.RC:$src), 3343 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3344 "${dst} {${mask}} {z}, $src}"), 3345 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3346 (_.VT _.RC:$src), 3347 _.ImmAllZerosV)))], _.ExeDomain>, 3348 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3349 3350 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3351 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3352 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3353 !if(NoRMPattern, [], 3354 [(set _.RC:$dst, 3355 (_.VT (ld_frag addr:$src)))]), 3356 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3357 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3358 3359 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3360 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3361 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3362 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3363 "${dst} {${mask}}, $src1}"), 3364 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3365 (_.VT _.RC:$src1), 3366 (_.VT _.RC:$src0))))], _.ExeDomain>, 3367 EVEX, EVEX_K, Sched<[Sched.RR]>; 3368 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3369 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3370 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3371 "${dst} {${mask}}, $src1}"), 3372 [(set _.RC:$dst, (_.VT 3373 (vselect_mask _.KRCWM:$mask, 3374 (_.VT (ld_frag addr:$src1)), 3375 (_.VT _.RC:$src0))))], _.ExeDomain>, 3376 EVEX, EVEX_K, Sched<[Sched.RM]>; 3377 } 3378 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3379 (ins _.KRCWM:$mask, _.MemOp:$src), 3380 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3381 "${dst} {${mask}} {z}, $src}", 3382 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3383 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3384 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3385 } 3386 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3387 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3388 3389 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3390 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3391 3392 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3393 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3394 _.KRCWM:$mask, addr:$ptr)>; 3395} 3396 3397multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3398 AVX512VLVectorVTInfo _, Predicate prd, 3399 X86SchedWriteMoveLSWidths Sched, 3400 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3401 let Predicates = [prd] in 3402 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3403 _.info512.AlignedLdFrag, masked_load_aligned, 3404 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3405 3406 let Predicates = [prd, HasVLX] in { 3407 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3408 _.info256.AlignedLdFrag, masked_load_aligned, 3409 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3410 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3411 _.info128.AlignedLdFrag, masked_load_aligned, 3412 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3413 } 3414} 3415 3416multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3417 AVX512VLVectorVTInfo _, Predicate prd, 3418 X86SchedWriteMoveLSWidths Sched, 3419 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3420 SDPatternOperator SelectOprr = vselect> { 3421 let Predicates = [prd] in 3422 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3423 masked_load, Sched.ZMM, "", 3424 NoRMPattern, SelectOprr>, EVEX_V512; 3425 3426 let Predicates = [prd, HasVLX] in { 3427 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3428 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3429 NoRMPattern, SelectOprr>, EVEX_V256; 3430 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3431 masked_load, Sched.XMM, EVEX2VEXOvrd, 3432 NoRMPattern, SelectOprr>, EVEX_V128; 3433 } 3434} 3435 3436multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3437 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3438 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3439 bit NoMRPattern = 0> { 3440 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3441 let isMoveReg = 1 in 3442 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3443 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3444 [], _.ExeDomain>, EVEX, 3445 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3446 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3447 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3448 (ins _.KRCWM:$mask, _.RC:$src), 3449 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3450 "${dst} {${mask}}, $src}", 3451 [], _.ExeDomain>, EVEX, EVEX_K, 3452 FoldGenData<BaseName#_.ZSuffix#rrk>, 3453 Sched<[Sched.RR]>; 3454 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3455 (ins _.KRCWM:$mask, _.RC:$src), 3456 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3457 "${dst} {${mask}} {z}, $src}", 3458 [], _.ExeDomain>, EVEX, EVEX_KZ, 3459 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3460 Sched<[Sched.RR]>; 3461 } 3462 3463 let hasSideEffects = 0, mayStore = 1 in 3464 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3465 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3466 !if(NoMRPattern, [], 3467 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3468 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3469 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3470 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3471 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3472 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3473 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3474 NotMemoryFoldable; 3475 3476 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3477 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3478 _.KRCWM:$mask, _.RC:$src)>; 3479 3480 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3481 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3482 _.RC:$dst, _.RC:$src), 0>; 3483 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3484 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3485 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3486 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3487 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3488 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3489} 3490 3491multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3492 AVX512VLVectorVTInfo _, Predicate prd, 3493 X86SchedWriteMoveLSWidths Sched, 3494 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3495 let Predicates = [prd] in 3496 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3497 masked_store, Sched.ZMM, "", 3498 NoMRPattern>, EVEX_V512; 3499 let Predicates = [prd, HasVLX] in { 3500 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3501 masked_store, Sched.YMM, 3502 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3503 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3504 masked_store, Sched.XMM, EVEX2VEXOvrd, 3505 NoMRPattern>, EVEX_V128; 3506 } 3507} 3508 3509multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3510 AVX512VLVectorVTInfo _, Predicate prd, 3511 X86SchedWriteMoveLSWidths Sched, 3512 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3513 let Predicates = [prd] in 3514 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3515 masked_store_aligned, Sched.ZMM, "", 3516 NoMRPattern>, EVEX_V512; 3517 3518 let Predicates = [prd, HasVLX] in { 3519 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3520 masked_store_aligned, Sched.YMM, 3521 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3522 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3523 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3524 NoMRPattern>, EVEX_V128; 3525 } 3526} 3527 3528defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3529 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3530 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3531 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3532 PS, EVEX_CD8<32, CD8VF>; 3533 3534defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3535 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3536 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3537 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3538 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3539 3540defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3541 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3542 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3543 SchedWriteFMoveLS, "VMOVUPS">, 3544 PS, EVEX_CD8<32, CD8VF>; 3545 3546defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3547 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3548 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3549 SchedWriteFMoveLS, "VMOVUPD">, 3550 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3551 3552defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3553 HasAVX512, SchedWriteVecMoveLS, 3554 "VMOVDQA", 1>, 3555 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3556 HasAVX512, SchedWriteVecMoveLS, 3557 "VMOVDQA", 1>, 3558 PD, EVEX_CD8<32, CD8VF>; 3559 3560defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3561 HasAVX512, SchedWriteVecMoveLS, 3562 "VMOVDQA">, 3563 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3564 HasAVX512, SchedWriteVecMoveLS, 3565 "VMOVDQA">, 3566 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3567 3568defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3569 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3570 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3571 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3572 XD, EVEX_CD8<8, CD8VF>; 3573 3574defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3575 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3576 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3577 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3578 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3579 3580defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3581 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3582 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3583 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3584 XS, EVEX_CD8<32, CD8VF>; 3585 3586defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3587 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3588 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3589 SchedWriteVecMoveLS, "VMOVDQU">, 3590 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3591 3592// Special instructions to help with spilling when we don't have VLX. We need 3593// to load or store from a ZMM register instead. These are converted in 3594// expandPostRAPseudos. 3595let isReMaterializable = 1, canFoldAsLoad = 1, 3596 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3597def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3598 "", []>, Sched<[WriteFLoadX]>; 3599def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3600 "", []>, Sched<[WriteFLoadY]>; 3601def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3602 "", []>, Sched<[WriteFLoadX]>; 3603def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3604 "", []>, Sched<[WriteFLoadY]>; 3605} 3606 3607let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3608def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3609 "", []>, Sched<[WriteFStoreX]>; 3610def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3611 "", []>, Sched<[WriteFStoreY]>; 3612def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3613 "", []>, Sched<[WriteFStoreX]>; 3614def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3615 "", []>, Sched<[WriteFStoreY]>; 3616} 3617 3618def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3619 (v8i64 VR512:$src))), 3620 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3621 VK8), VR512:$src)>; 3622 3623def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3624 (v16i32 VR512:$src))), 3625 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3626 3627// These patterns exist to prevent the above patterns from introducing a second 3628// mask inversion when one already exists. 3629def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3630 (v8i64 immAllZerosV), 3631 (v8i64 VR512:$src))), 3632 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3633def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3634 (v16i32 immAllZerosV), 3635 (v16i32 VR512:$src))), 3636 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3637 3638multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3639 X86VectorVTInfo Wide> { 3640 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3641 Narrow.RC:$src1, Narrow.RC:$src0)), 3642 (EXTRACT_SUBREG 3643 (Wide.VT 3644 (!cast<Instruction>(InstrStr#"rrk") 3645 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3646 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3647 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3648 Narrow.SubRegIdx)>; 3649 3650 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3651 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3652 (EXTRACT_SUBREG 3653 (Wide.VT 3654 (!cast<Instruction>(InstrStr#"rrkz") 3655 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3656 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3657 Narrow.SubRegIdx)>; 3658} 3659 3660// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3661// available. Use a 512-bit operation and extract. 3662let Predicates = [HasAVX512, NoVLX] in { 3663 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3664 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3665 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3666 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3667 3668 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3669 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3670 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3671 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3672} 3673 3674let Predicates = [HasBWI, NoVLX] in { 3675 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3676 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3677 3678 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3679 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3680} 3681 3682let Predicates = [HasAVX512] in { 3683 // 512-bit load. 3684 def : Pat<(alignedloadv16i32 addr:$src), 3685 (VMOVDQA64Zrm addr:$src)>; 3686 def : Pat<(alignedloadv32i16 addr:$src), 3687 (VMOVDQA64Zrm addr:$src)>; 3688 def : Pat<(alignedloadv64i8 addr:$src), 3689 (VMOVDQA64Zrm addr:$src)>; 3690 def : Pat<(loadv16i32 addr:$src), 3691 (VMOVDQU64Zrm addr:$src)>; 3692 def : Pat<(loadv32i16 addr:$src), 3693 (VMOVDQU64Zrm addr:$src)>; 3694 def : Pat<(loadv64i8 addr:$src), 3695 (VMOVDQU64Zrm addr:$src)>; 3696 3697 // 512-bit store. 3698 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3699 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3700 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3701 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3702 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3703 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3704 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3705 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3706 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3707 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3708 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3709 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3710} 3711 3712let Predicates = [HasVLX] in { 3713 // 128-bit load. 3714 def : Pat<(alignedloadv4i32 addr:$src), 3715 (VMOVDQA64Z128rm addr:$src)>; 3716 def : Pat<(alignedloadv8i16 addr:$src), 3717 (VMOVDQA64Z128rm addr:$src)>; 3718 def : Pat<(alignedloadv16i8 addr:$src), 3719 (VMOVDQA64Z128rm addr:$src)>; 3720 def : Pat<(loadv4i32 addr:$src), 3721 (VMOVDQU64Z128rm addr:$src)>; 3722 def : Pat<(loadv8i16 addr:$src), 3723 (VMOVDQU64Z128rm addr:$src)>; 3724 def : Pat<(loadv16i8 addr:$src), 3725 (VMOVDQU64Z128rm addr:$src)>; 3726 3727 // 128-bit store. 3728 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3729 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3730 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3731 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3732 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3733 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3734 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3735 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3736 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3737 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3738 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3739 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3740 3741 // 256-bit load. 3742 def : Pat<(alignedloadv8i32 addr:$src), 3743 (VMOVDQA64Z256rm addr:$src)>; 3744 def : Pat<(alignedloadv16i16 addr:$src), 3745 (VMOVDQA64Z256rm addr:$src)>; 3746 def : Pat<(alignedloadv32i8 addr:$src), 3747 (VMOVDQA64Z256rm addr:$src)>; 3748 def : Pat<(loadv8i32 addr:$src), 3749 (VMOVDQU64Z256rm addr:$src)>; 3750 def : Pat<(loadv16i16 addr:$src), 3751 (VMOVDQU64Z256rm addr:$src)>; 3752 def : Pat<(loadv32i8 addr:$src), 3753 (VMOVDQU64Z256rm addr:$src)>; 3754 3755 // 256-bit store. 3756 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3757 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3758 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3759 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3760 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3761 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3762 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3763 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3764 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3765 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3766 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3767 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3768} 3769 3770// Move Int Doubleword to Packed Double Int 3771// 3772let ExeDomain = SSEPackedInt in { 3773def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3774 "vmovd\t{$src, $dst|$dst, $src}", 3775 [(set VR128X:$dst, 3776 (v4i32 (scalar_to_vector GR32:$src)))]>, 3777 EVEX, Sched<[WriteVecMoveFromGpr]>; 3778def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3779 "vmovd\t{$src, $dst|$dst, $src}", 3780 [(set VR128X:$dst, 3781 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3782 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3783def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3784 "vmovq\t{$src, $dst|$dst, $src}", 3785 [(set VR128X:$dst, 3786 (v2i64 (scalar_to_vector GR64:$src)))]>, 3787 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3788let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3789def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3790 (ins i64mem:$src), 3791 "vmovq\t{$src, $dst|$dst, $src}", []>, 3792 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3793let isCodeGenOnly = 1 in { 3794def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3795 "vmovq\t{$src, $dst|$dst, $src}", 3796 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3797 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3798def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3799 "vmovq\t{$src, $dst|$dst, $src}", 3800 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3801 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3802} 3803} // ExeDomain = SSEPackedInt 3804 3805// Move Int Doubleword to Single Scalar 3806// 3807let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3808def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3809 "vmovd\t{$src, $dst|$dst, $src}", 3810 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3811 EVEX, Sched<[WriteVecMoveFromGpr]>; 3812} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3813 3814// Move doubleword from xmm register to r/m32 3815// 3816let ExeDomain = SSEPackedInt in { 3817def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3818 "vmovd\t{$src, $dst|$dst, $src}", 3819 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3820 (iPTR 0)))]>, 3821 EVEX, Sched<[WriteVecMoveToGpr]>; 3822def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3823 (ins i32mem:$dst, VR128X:$src), 3824 "vmovd\t{$src, $dst|$dst, $src}", 3825 [(store (i32 (extractelt (v4i32 VR128X:$src), 3826 (iPTR 0))), addr:$dst)]>, 3827 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3828} // ExeDomain = SSEPackedInt 3829 3830// Move quadword from xmm1 register to r/m64 3831// 3832let ExeDomain = SSEPackedInt in { 3833def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3834 "vmovq\t{$src, $dst|$dst, $src}", 3835 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3836 (iPTR 0)))]>, 3837 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 3838 Requires<[HasAVX512]>; 3839 3840let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3841def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3842 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 3843 EVEX, VEX_W, Sched<[WriteVecStore]>, 3844 Requires<[HasAVX512, In64BitMode]>; 3845 3846def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3847 (ins i64mem:$dst, VR128X:$src), 3848 "vmovq\t{$src, $dst|$dst, $src}", 3849 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3850 addr:$dst)]>, 3851 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 3852 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3853 3854let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3855def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3856 (ins VR128X:$src), 3857 "vmovq\t{$src, $dst|$dst, $src}", []>, 3858 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 3859} // ExeDomain = SSEPackedInt 3860 3861def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3862 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3863 3864let Predicates = [HasAVX512] in { 3865 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3866 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3867} 3868 3869// Move Scalar Single to Double Int 3870// 3871let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3872def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3873 (ins FR32X:$src), 3874 "vmovd\t{$src, $dst|$dst, $src}", 3875 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3876 EVEX, Sched<[WriteVecMoveToGpr]>; 3877} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3878 3879// Move Quadword Int to Packed Quadword Int 3880// 3881let ExeDomain = SSEPackedInt in { 3882def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3883 (ins i64mem:$src), 3884 "vmovq\t{$src, $dst|$dst, $src}", 3885 [(set VR128X:$dst, 3886 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3887 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3888} // ExeDomain = SSEPackedInt 3889 3890// Allow "vmovd" but print "vmovq". 3891def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3892 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3893def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3894 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3895 3896// Conversions between masks and scalar fp. 3897def : Pat<(v32i1 (bitconvert FR32X:$src)), 3898 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 3899def : Pat<(f32 (bitconvert VK32:$src)), 3900 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 3901 3902def : Pat<(v64i1 (bitconvert FR64X:$src)), 3903 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 3904def : Pat<(f64 (bitconvert VK64:$src)), 3905 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 3906 3907//===----------------------------------------------------------------------===// 3908// AVX-512 MOVSS, MOVSD 3909//===----------------------------------------------------------------------===// 3910 3911multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3912 X86VectorVTInfo _> { 3913 let Predicates = [HasAVX512, OptForSize] in 3914 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3915 (ins _.RC:$src1, _.RC:$src2), 3916 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3917 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3918 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 3919 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3920 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3921 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3922 "$dst {${mask}} {z}, $src1, $src2}"), 3923 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3924 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3925 _.ImmAllZerosV)))], 3926 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3927 let Constraints = "$src0 = $dst" in 3928 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3929 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3930 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3931 "$dst {${mask}}, $src1, $src2}"), 3932 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3933 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3934 (_.VT _.RC:$src0))))], 3935 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3936 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3937 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3938 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3939 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3940 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3941 // _alt version uses FR32/FR64 register class. 3942 let isCodeGenOnly = 1 in 3943 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3944 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3945 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3946 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3947 } 3948 let mayLoad = 1, hasSideEffects = 0 in { 3949 let Constraints = "$src0 = $dst" in 3950 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3951 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3952 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3953 "$dst {${mask}}, $src}"), 3954 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3955 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3956 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3957 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3958 "$dst {${mask}} {z}, $src}"), 3959 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3960 } 3961 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3962 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3963 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3964 EVEX, Sched<[WriteFStore]>; 3965 let mayStore = 1, hasSideEffects = 0 in 3966 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3967 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 3968 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3969 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 3970 NotMemoryFoldable; 3971} 3972 3973defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 3974 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 3975 3976defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 3977 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 3978 3979 3980multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 3981 PatLeaf ZeroFP, X86VectorVTInfo _> { 3982 3983def : Pat<(_.VT (OpNode _.RC:$src0, 3984 (_.VT (scalar_to_vector 3985 (_.EltVT (X86selects VK1WM:$mask, 3986 (_.EltVT _.FRC:$src1), 3987 (_.EltVT _.FRC:$src2))))))), 3988 (!cast<Instruction>(InstrStr#rrk) 3989 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 3990 VK1WM:$mask, 3991 (_.VT _.RC:$src0), 3992 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3993 3994def : Pat<(_.VT (OpNode _.RC:$src0, 3995 (_.VT (scalar_to_vector 3996 (_.EltVT (X86selects VK1WM:$mask, 3997 (_.EltVT _.FRC:$src1), 3998 (_.EltVT ZeroFP))))))), 3999 (!cast<Instruction>(InstrStr#rrkz) 4000 VK1WM:$mask, 4001 (_.VT _.RC:$src0), 4002 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4003} 4004 4005multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4006 dag Mask, RegisterClass MaskRC> { 4007 4008def : Pat<(masked_store 4009 (_.info512.VT (insert_subvector undef, 4010 (_.info128.VT _.info128.RC:$src), 4011 (iPTR 0))), addr:$dst, Mask), 4012 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4013 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4014 _.info128.RC:$src)>; 4015 4016} 4017 4018multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4019 AVX512VLVectorVTInfo _, 4020 dag Mask, RegisterClass MaskRC, 4021 SubRegIndex subreg> { 4022 4023def : Pat<(masked_store 4024 (_.info512.VT (insert_subvector undef, 4025 (_.info128.VT _.info128.RC:$src), 4026 (iPTR 0))), addr:$dst, Mask), 4027 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4028 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4029 _.info128.RC:$src)>; 4030 4031} 4032 4033// This matches the more recent codegen from clang that avoids emitting a 512 4034// bit masked store directly. Codegen will widen 128-bit masked store to 512 4035// bits on AVX512F only targets. 4036multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4037 AVX512VLVectorVTInfo _, 4038 dag Mask512, dag Mask128, 4039 RegisterClass MaskRC, 4040 SubRegIndex subreg> { 4041 4042// AVX512F pattern. 4043def : Pat<(masked_store 4044 (_.info512.VT (insert_subvector undef, 4045 (_.info128.VT _.info128.RC:$src), 4046 (iPTR 0))), addr:$dst, Mask512), 4047 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4048 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4049 _.info128.RC:$src)>; 4050 4051// AVX512VL pattern. 4052def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4053 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4054 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4055 _.info128.RC:$src)>; 4056} 4057 4058multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4059 dag Mask, RegisterClass MaskRC> { 4060 4061def : Pat<(_.info128.VT (extract_subvector 4062 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4063 _.info512.ImmAllZerosV)), 4064 (iPTR 0))), 4065 (!cast<Instruction>(InstrStr#rmkz) 4066 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4067 addr:$srcAddr)>; 4068 4069def : Pat<(_.info128.VT (extract_subvector 4070 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4071 (_.info512.VT (insert_subvector undef, 4072 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4073 (iPTR 0))))), 4074 (iPTR 0))), 4075 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4076 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4077 addr:$srcAddr)>; 4078 4079} 4080 4081multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4082 AVX512VLVectorVTInfo _, 4083 dag Mask, RegisterClass MaskRC, 4084 SubRegIndex subreg> { 4085 4086def : Pat<(_.info128.VT (extract_subvector 4087 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4088 _.info512.ImmAllZerosV)), 4089 (iPTR 0))), 4090 (!cast<Instruction>(InstrStr#rmkz) 4091 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4092 addr:$srcAddr)>; 4093 4094def : Pat<(_.info128.VT (extract_subvector 4095 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4096 (_.info512.VT (insert_subvector undef, 4097 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4098 (iPTR 0))))), 4099 (iPTR 0))), 4100 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4101 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4102 addr:$srcAddr)>; 4103 4104} 4105 4106// This matches the more recent codegen from clang that avoids emitting a 512 4107// bit masked load directly. Codegen will widen 128-bit masked load to 512 4108// bits on AVX512F only targets. 4109multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4110 AVX512VLVectorVTInfo _, 4111 dag Mask512, dag Mask128, 4112 RegisterClass MaskRC, 4113 SubRegIndex subreg> { 4114// AVX512F patterns. 4115def : Pat<(_.info128.VT (extract_subvector 4116 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4117 _.info512.ImmAllZerosV)), 4118 (iPTR 0))), 4119 (!cast<Instruction>(InstrStr#rmkz) 4120 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4121 addr:$srcAddr)>; 4122 4123def : Pat<(_.info128.VT (extract_subvector 4124 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4125 (_.info512.VT (insert_subvector undef, 4126 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4127 (iPTR 0))))), 4128 (iPTR 0))), 4129 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4130 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4131 addr:$srcAddr)>; 4132 4133// AVX512Vl patterns. 4134def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4135 _.info128.ImmAllZerosV)), 4136 (!cast<Instruction>(InstrStr#rmkz) 4137 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4138 addr:$srcAddr)>; 4139 4140def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4141 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4142 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4143 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4144 addr:$srcAddr)>; 4145} 4146 4147defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4148defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4149 4150defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4151 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4152defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4153 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4154defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4155 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4156 4157defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4158 (v16i1 (insert_subvector 4159 (v16i1 immAllZerosV), 4160 (v4i1 (extract_subvector 4161 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4162 (iPTR 0))), 4163 (iPTR 0))), 4164 (v4i1 (extract_subvector 4165 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4166 (iPTR 0))), GR8, sub_8bit>; 4167defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4168 (v8i1 4169 (extract_subvector 4170 (v16i1 4171 (insert_subvector 4172 (v16i1 immAllZerosV), 4173 (v2i1 (extract_subvector 4174 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4175 (iPTR 0))), 4176 (iPTR 0))), 4177 (iPTR 0))), 4178 (v2i1 (extract_subvector 4179 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4180 (iPTR 0))), GR8, sub_8bit>; 4181 4182defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4183 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4184defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4185 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4186defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4187 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4188 4189defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4190 (v16i1 (insert_subvector 4191 (v16i1 immAllZerosV), 4192 (v4i1 (extract_subvector 4193 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4194 (iPTR 0))), 4195 (iPTR 0))), 4196 (v4i1 (extract_subvector 4197 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4198 (iPTR 0))), GR8, sub_8bit>; 4199defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4200 (v8i1 4201 (extract_subvector 4202 (v16i1 4203 (insert_subvector 4204 (v16i1 immAllZerosV), 4205 (v2i1 (extract_subvector 4206 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4207 (iPTR 0))), 4208 (iPTR 0))), 4209 (iPTR 0))), 4210 (v2i1 (extract_subvector 4211 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4212 (iPTR 0))), GR8, sub_8bit>; 4213 4214def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4215 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4216 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4217 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4218 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4219 4220def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4221 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4222 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4223 4224def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4225 (COPY_TO_REGCLASS 4226 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4227 VK1WM:$mask, addr:$src)), 4228 FR32X)>; 4229def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4230 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4231 4232def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4233 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4234 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4235 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4236 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4237 4238def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4239 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4240 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4241 4242def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4243 (COPY_TO_REGCLASS 4244 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4245 VK1WM:$mask, addr:$src)), 4246 FR64X)>; 4247def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4248 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4249 4250 4251def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4252 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4253def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4254 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4255 4256def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4257 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4258def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4259 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4260 4261let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4262 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4263 (ins VR128X:$src1, VR128X:$src2), 4264 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4265 []>, XS, EVEX_4V, VEX_LIG, 4266 FoldGenData<"VMOVSSZrr">, 4267 Sched<[SchedWriteFShuffle.XMM]>; 4268 4269 let Constraints = "$src0 = $dst" in 4270 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4271 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4272 VR128X:$src1, VR128X:$src2), 4273 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4274 "$dst {${mask}}, $src1, $src2}", 4275 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4276 FoldGenData<"VMOVSSZrrk">, 4277 Sched<[SchedWriteFShuffle.XMM]>; 4278 4279 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4280 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4281 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4282 "$dst {${mask}} {z}, $src1, $src2}", 4283 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4284 FoldGenData<"VMOVSSZrrkz">, 4285 Sched<[SchedWriteFShuffle.XMM]>; 4286 4287 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4288 (ins VR128X:$src1, VR128X:$src2), 4289 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4290 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4291 FoldGenData<"VMOVSDZrr">, 4292 Sched<[SchedWriteFShuffle.XMM]>; 4293 4294 let Constraints = "$src0 = $dst" in 4295 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4296 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4297 VR128X:$src1, VR128X:$src2), 4298 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4299 "$dst {${mask}}, $src1, $src2}", 4300 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4301 VEX_W, FoldGenData<"VMOVSDZrrk">, 4302 Sched<[SchedWriteFShuffle.XMM]>; 4303 4304 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4305 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4306 VR128X:$src2), 4307 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4308 "$dst {${mask}} {z}, $src1, $src2}", 4309 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4310 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4311 Sched<[SchedWriteFShuffle.XMM]>; 4312} 4313 4314def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4315 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4316def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4317 "$dst {${mask}}, $src1, $src2}", 4318 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4319 VR128X:$src1, VR128X:$src2), 0>; 4320def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4321 "$dst {${mask}} {z}, $src1, $src2}", 4322 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4323 VR128X:$src1, VR128X:$src2), 0>; 4324def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4325 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4326def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4327 "$dst {${mask}}, $src1, $src2}", 4328 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4329 VR128X:$src1, VR128X:$src2), 0>; 4330def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4331 "$dst {${mask}} {z}, $src1, $src2}", 4332 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4333 VR128X:$src1, VR128X:$src2), 0>; 4334 4335let Predicates = [HasAVX512, OptForSize] in { 4336 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4337 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4338 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4339 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4340 4341 // Move low f32 and clear high bits. 4342 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4343 (SUBREG_TO_REG (i32 0), 4344 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4345 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4346 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4347 (SUBREG_TO_REG (i32 0), 4348 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4349 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4350 4351 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4352 (SUBREG_TO_REG (i32 0), 4353 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4354 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4355 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4356 (SUBREG_TO_REG (i32 0), 4357 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4358 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4359} 4360 4361// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4362// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4363let Predicates = [HasAVX512, OptForSpeed] in { 4364 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4365 (SUBREG_TO_REG (i32 0), 4366 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4367 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4368 (i8 1))), sub_xmm)>; 4369 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4370 (SUBREG_TO_REG (i32 0), 4371 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4372 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4373 (i8 3))), sub_xmm)>; 4374} 4375 4376let Predicates = [HasAVX512] in { 4377 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4378 (VMOVSSZrm addr:$src)>; 4379 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4380 (VMOVSDZrm addr:$src)>; 4381 4382 // Represent the same patterns above but in the form they appear for 4383 // 256-bit types 4384 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4385 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4386 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4387 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4388 4389 // Represent the same patterns above but in the form they appear for 4390 // 512-bit types 4391 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4392 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4393 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4394 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4395} 4396 4397let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4398def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4399 (ins VR128X:$src), 4400 "vmovq\t{$src, $dst|$dst, $src}", 4401 [(set VR128X:$dst, (v2i64 (X86vzmovl 4402 (v2i64 VR128X:$src))))]>, 4403 EVEX, VEX_W; 4404} 4405 4406let Predicates = [HasAVX512] in { 4407 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4408 (VMOVDI2PDIZrr GR32:$src)>; 4409 4410 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4411 (VMOV64toPQIZrr GR64:$src)>; 4412 4413 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4414 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4415 (VMOVDI2PDIZrm addr:$src)>; 4416 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4417 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4418 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4419 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4420 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4421 (VMOVQI2PQIZrm addr:$src)>; 4422 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4423 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4424 4425 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4426 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4427 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4428 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4429 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4430 4431 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4432 (SUBREG_TO_REG (i32 0), 4433 (v2f64 (VMOVZPQILo2PQIZrr 4434 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4435 sub_xmm)>; 4436 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4437 (SUBREG_TO_REG (i32 0), 4438 (v2i64 (VMOVZPQILo2PQIZrr 4439 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4440 sub_xmm)>; 4441 4442 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4443 (SUBREG_TO_REG (i32 0), 4444 (v2f64 (VMOVZPQILo2PQIZrr 4445 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4446 sub_xmm)>; 4447 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4448 (SUBREG_TO_REG (i32 0), 4449 (v2i64 (VMOVZPQILo2PQIZrr 4450 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4451 sub_xmm)>; 4452} 4453 4454//===----------------------------------------------------------------------===// 4455// AVX-512 - Non-temporals 4456//===----------------------------------------------------------------------===// 4457 4458def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4459 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4460 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4461 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4462 4463let Predicates = [HasVLX] in { 4464 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4465 (ins i256mem:$src), 4466 "vmovntdqa\t{$src, $dst|$dst, $src}", 4467 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4468 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4469 4470 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4471 (ins i128mem:$src), 4472 "vmovntdqa\t{$src, $dst|$dst, $src}", 4473 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4474 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4475} 4476 4477multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4478 X86SchedWriteMoveLS Sched, 4479 PatFrag st_frag = alignednontemporalstore> { 4480 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4481 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4482 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4483 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4484 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4485} 4486 4487multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4488 AVX512VLVectorVTInfo VTInfo, 4489 X86SchedWriteMoveLSWidths Sched> { 4490 let Predicates = [HasAVX512] in 4491 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4492 4493 let Predicates = [HasAVX512, HasVLX] in { 4494 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4495 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4496 } 4497} 4498 4499defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4500 SchedWriteVecMoveLSNT>, PD; 4501defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4502 SchedWriteFMoveLSNT>, PD, VEX_W; 4503defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4504 SchedWriteFMoveLSNT>, PS; 4505 4506let Predicates = [HasAVX512], AddedComplexity = 400 in { 4507 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4508 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4509 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4510 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4511 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4512 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4513 4514 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4515 (VMOVNTDQAZrm addr:$src)>; 4516 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4517 (VMOVNTDQAZrm addr:$src)>; 4518 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4519 (VMOVNTDQAZrm addr:$src)>; 4520 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4521 (VMOVNTDQAZrm addr:$src)>; 4522 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4523 (VMOVNTDQAZrm addr:$src)>; 4524 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4525 (VMOVNTDQAZrm addr:$src)>; 4526} 4527 4528let Predicates = [HasVLX], AddedComplexity = 400 in { 4529 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4530 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4531 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4532 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4533 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4534 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4535 4536 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4537 (VMOVNTDQAZ256rm addr:$src)>; 4538 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4539 (VMOVNTDQAZ256rm addr:$src)>; 4540 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4541 (VMOVNTDQAZ256rm addr:$src)>; 4542 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4543 (VMOVNTDQAZ256rm addr:$src)>; 4544 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4545 (VMOVNTDQAZ256rm addr:$src)>; 4546 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4547 (VMOVNTDQAZ256rm addr:$src)>; 4548 4549 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4550 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4551 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4552 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4553 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4554 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4555 4556 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4557 (VMOVNTDQAZ128rm addr:$src)>; 4558 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4559 (VMOVNTDQAZ128rm addr:$src)>; 4560 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4561 (VMOVNTDQAZ128rm addr:$src)>; 4562 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4563 (VMOVNTDQAZ128rm addr:$src)>; 4564 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4565 (VMOVNTDQAZ128rm addr:$src)>; 4566 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4567 (VMOVNTDQAZ128rm addr:$src)>; 4568} 4569 4570//===----------------------------------------------------------------------===// 4571// AVX-512 - Integer arithmetic 4572// 4573multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4574 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4575 bit IsCommutable = 0> { 4576 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4577 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4578 "$src2, $src1", "$src1, $src2", 4579 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4580 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4581 Sched<[sched]>; 4582 4583 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4584 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4585 "$src2, $src1", "$src1, $src2", 4586 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4587 AVX512BIBase, EVEX_4V, 4588 Sched<[sched.Folded, sched.ReadAfterFold]>; 4589} 4590 4591multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4592 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4593 bit IsCommutable = 0> : 4594 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4595 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4596 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4597 "${src2}"#_.BroadcastStr#", $src1", 4598 "$src1, ${src2}"#_.BroadcastStr, 4599 (_.VT (OpNode _.RC:$src1, 4600 (_.BroadcastLdFrag addr:$src2)))>, 4601 AVX512BIBase, EVEX_4V, EVEX_B, 4602 Sched<[sched.Folded, sched.ReadAfterFold]>; 4603} 4604 4605multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4606 AVX512VLVectorVTInfo VTInfo, 4607 X86SchedWriteWidths sched, Predicate prd, 4608 bit IsCommutable = 0> { 4609 let Predicates = [prd] in 4610 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4611 IsCommutable>, EVEX_V512; 4612 4613 let Predicates = [prd, HasVLX] in { 4614 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4615 sched.YMM, IsCommutable>, EVEX_V256; 4616 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4617 sched.XMM, IsCommutable>, EVEX_V128; 4618 } 4619} 4620 4621multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4622 AVX512VLVectorVTInfo VTInfo, 4623 X86SchedWriteWidths sched, Predicate prd, 4624 bit IsCommutable = 0> { 4625 let Predicates = [prd] in 4626 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4627 IsCommutable>, EVEX_V512; 4628 4629 let Predicates = [prd, HasVLX] in { 4630 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4631 sched.YMM, IsCommutable>, EVEX_V256; 4632 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4633 sched.XMM, IsCommutable>, EVEX_V128; 4634 } 4635} 4636 4637multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4638 X86SchedWriteWidths sched, Predicate prd, 4639 bit IsCommutable = 0> { 4640 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4641 sched, prd, IsCommutable>, 4642 VEX_W, EVEX_CD8<64, CD8VF>; 4643} 4644 4645multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4646 X86SchedWriteWidths sched, Predicate prd, 4647 bit IsCommutable = 0> { 4648 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4649 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4650} 4651 4652multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4653 X86SchedWriteWidths sched, Predicate prd, 4654 bit IsCommutable = 0> { 4655 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4656 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4657 VEX_WIG; 4658} 4659 4660multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4661 X86SchedWriteWidths sched, Predicate prd, 4662 bit IsCommutable = 0> { 4663 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4664 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4665 VEX_WIG; 4666} 4667 4668multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4669 SDNode OpNode, X86SchedWriteWidths sched, 4670 Predicate prd, bit IsCommutable = 0> { 4671 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4672 IsCommutable>; 4673 4674 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4675 IsCommutable>; 4676} 4677 4678multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4679 SDNode OpNode, X86SchedWriteWidths sched, 4680 Predicate prd, bit IsCommutable = 0> { 4681 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4682 IsCommutable>; 4683 4684 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4685 IsCommutable>; 4686} 4687 4688multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4689 bits<8> opc_d, bits<8> opc_q, 4690 string OpcodeStr, SDNode OpNode, 4691 X86SchedWriteWidths sched, 4692 bit IsCommutable = 0> { 4693 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4694 sched, HasAVX512, IsCommutable>, 4695 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4696 sched, HasBWI, IsCommutable>; 4697} 4698 4699multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4700 X86FoldableSchedWrite sched, 4701 SDNode OpNode,X86VectorVTInfo _Src, 4702 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4703 bit IsCommutable = 0> { 4704 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4705 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4706 "$src2, $src1","$src1, $src2", 4707 (_Dst.VT (OpNode 4708 (_Src.VT _Src.RC:$src1), 4709 (_Src.VT _Src.RC:$src2))), 4710 IsCommutable>, 4711 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4712 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4713 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4714 "$src2, $src1", "$src1, $src2", 4715 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4716 (_Src.LdFrag addr:$src2)))>, 4717 AVX512BIBase, EVEX_4V, 4718 Sched<[sched.Folded, sched.ReadAfterFold]>; 4719 4720 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4721 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4722 OpcodeStr, 4723 "${src2}"#_Brdct.BroadcastStr#", $src1", 4724 "$src1, ${src2}"#_Brdct.BroadcastStr, 4725 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4726 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4727 AVX512BIBase, EVEX_4V, EVEX_B, 4728 Sched<[sched.Folded, sched.ReadAfterFold]>; 4729} 4730 4731defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4732 SchedWriteVecALU, 1>; 4733defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4734 SchedWriteVecALU, 0>; 4735defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4736 SchedWriteVecALU, HasBWI, 1>; 4737defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4738 SchedWriteVecALU, HasBWI, 0>; 4739defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4740 SchedWriteVecALU, HasBWI, 1>; 4741defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4742 SchedWriteVecALU, HasBWI, 0>; 4743defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4744 SchedWritePMULLD, HasAVX512, 1>, T8PD; 4745defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4746 SchedWriteVecIMul, HasBWI, 1>; 4747defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4748 SchedWriteVecIMul, HasDQI, 1>, T8PD, 4749 NotEVEX2VEXConvertible; 4750defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4751 HasBWI, 1>; 4752defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4753 HasBWI, 1>; 4754defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4755 SchedWriteVecIMul, HasBWI, 1>, T8PD; 4756defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 4757 SchedWriteVecALU, HasBWI, 1>; 4758defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4759 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 4760defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4761 SchedWriteVecIMul, HasAVX512, 1>; 4762 4763multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4764 X86SchedWriteWidths sched, 4765 AVX512VLVectorVTInfo _SrcVTInfo, 4766 AVX512VLVectorVTInfo _DstVTInfo, 4767 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4768 let Predicates = [prd] in 4769 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4770 _SrcVTInfo.info512, _DstVTInfo.info512, 4771 v8i64_info, IsCommutable>, 4772 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 4773 let Predicates = [HasVLX, prd] in { 4774 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4775 _SrcVTInfo.info256, _DstVTInfo.info256, 4776 v4i64x_info, IsCommutable>, 4777 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 4778 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4779 _SrcVTInfo.info128, _DstVTInfo.info128, 4780 v2i64x_info, IsCommutable>, 4781 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 4782 } 4783} 4784 4785defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4786 avx512vl_i8_info, avx512vl_i8_info, 4787 X86multishift, HasVBMI, 0>, T8PD; 4788 4789multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4790 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4791 X86FoldableSchedWrite sched> { 4792 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4793 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4794 OpcodeStr, 4795 "${src2}"#_Src.BroadcastStr#", $src1", 4796 "$src1, ${src2}"#_Src.BroadcastStr, 4797 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4798 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4799 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4800 Sched<[sched.Folded, sched.ReadAfterFold]>; 4801} 4802 4803multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4804 SDNode OpNode,X86VectorVTInfo _Src, 4805 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4806 bit IsCommutable = 0> { 4807 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4808 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4809 "$src2, $src1","$src1, $src2", 4810 (_Dst.VT (OpNode 4811 (_Src.VT _Src.RC:$src1), 4812 (_Src.VT _Src.RC:$src2))), 4813 IsCommutable, IsCommutable>, 4814 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 4815 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4816 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4817 "$src2, $src1", "$src1, $src2", 4818 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4819 (_Src.LdFrag addr:$src2)))>, 4820 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 4821 Sched<[sched.Folded, sched.ReadAfterFold]>; 4822} 4823 4824multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4825 SDNode OpNode> { 4826 let Predicates = [HasBWI] in 4827 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4828 v32i16_info, SchedWriteShuffle.ZMM>, 4829 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4830 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4831 let Predicates = [HasBWI, HasVLX] in { 4832 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4833 v16i16x_info, SchedWriteShuffle.YMM>, 4834 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4835 v16i16x_info, SchedWriteShuffle.YMM>, 4836 EVEX_V256; 4837 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4838 v8i16x_info, SchedWriteShuffle.XMM>, 4839 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4840 v8i16x_info, SchedWriteShuffle.XMM>, 4841 EVEX_V128; 4842 } 4843} 4844multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4845 SDNode OpNode> { 4846 let Predicates = [HasBWI] in 4847 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4848 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 4849 let Predicates = [HasBWI, HasVLX] in { 4850 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4851 v32i8x_info, SchedWriteShuffle.YMM>, 4852 EVEX_V256, VEX_WIG; 4853 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4854 v16i8x_info, SchedWriteShuffle.XMM>, 4855 EVEX_V128, VEX_WIG; 4856 } 4857} 4858 4859multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4860 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4861 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4862 let Predicates = [HasBWI] in 4863 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4864 _Dst.info512, SchedWriteVecIMul.ZMM, 4865 IsCommutable>, EVEX_V512; 4866 let Predicates = [HasBWI, HasVLX] in { 4867 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4868 _Dst.info256, SchedWriteVecIMul.YMM, 4869 IsCommutable>, EVEX_V256; 4870 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4871 _Dst.info128, SchedWriteVecIMul.XMM, 4872 IsCommutable>, EVEX_V128; 4873 } 4874} 4875 4876defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4877defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4878defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4879defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4880 4881defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4882 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 4883defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4884 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 4885 4886defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4887 SchedWriteVecALU, HasBWI, 1>, T8PD; 4888defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4889 SchedWriteVecALU, HasBWI, 1>; 4890defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4891 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4892defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4893 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4894 NotEVEX2VEXConvertible; 4895 4896defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4897 SchedWriteVecALU, HasBWI, 1>; 4898defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4899 SchedWriteVecALU, HasBWI, 1>, T8PD; 4900defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4901 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4902defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4903 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4904 NotEVEX2VEXConvertible; 4905 4906defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4907 SchedWriteVecALU, HasBWI, 1>, T8PD; 4908defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4909 SchedWriteVecALU, HasBWI, 1>; 4910defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4911 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4912defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4913 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4914 NotEVEX2VEXConvertible; 4915 4916defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4917 SchedWriteVecALU, HasBWI, 1>; 4918defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4919 SchedWriteVecALU, HasBWI, 1>, T8PD; 4920defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4921 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4922defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4923 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4924 NotEVEX2VEXConvertible; 4925 4926// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4927let Predicates = [HasDQI, NoVLX] in { 4928 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4929 (EXTRACT_SUBREG 4930 (VPMULLQZrr 4931 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4932 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4933 sub_ymm)>; 4934 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4935 (EXTRACT_SUBREG 4936 (VPMULLQZrmb 4937 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4938 addr:$src2), 4939 sub_ymm)>; 4940 4941 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 4942 (EXTRACT_SUBREG 4943 (VPMULLQZrr 4944 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4945 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4946 sub_xmm)>; 4947 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 4948 (EXTRACT_SUBREG 4949 (VPMULLQZrmb 4950 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4951 addr:$src2), 4952 sub_xmm)>; 4953} 4954 4955multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 4956 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 4957 (EXTRACT_SUBREG 4958 (!cast<Instruction>(Instr#"rr") 4959 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4960 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4961 sub_ymm)>; 4962 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4963 (EXTRACT_SUBREG 4964 (!cast<Instruction>(Instr#"rmb") 4965 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4966 addr:$src2), 4967 sub_ymm)>; 4968 4969 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 4970 (EXTRACT_SUBREG 4971 (!cast<Instruction>(Instr#"rr") 4972 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4973 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4974 sub_xmm)>; 4975 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 4976 (EXTRACT_SUBREG 4977 (!cast<Instruction>(Instr#"rmb") 4978 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4979 addr:$src2), 4980 sub_xmm)>; 4981} 4982 4983let Predicates = [HasAVX512, NoVLX] in { 4984 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 4985 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 4986 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 4987 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 4988} 4989 4990//===----------------------------------------------------------------------===// 4991// AVX-512 Logical Instructions 4992//===----------------------------------------------------------------------===// 4993 4994defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 4995 SchedWriteVecLogic, HasAVX512, 1>; 4996defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 4997 SchedWriteVecLogic, HasAVX512, 1>; 4998defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 4999 SchedWriteVecLogic, HasAVX512, 1>; 5000defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5001 SchedWriteVecLogic, HasAVX512>; 5002 5003let Predicates = [HasVLX] in { 5004 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5005 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5006 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5007 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5008 5009 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5010 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5011 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5012 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5013 5014 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5015 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5016 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5017 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5018 5019 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5020 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5021 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5022 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5023 5024 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5025 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5026 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5027 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5028 5029 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5030 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5031 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5032 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5033 5034 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5035 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5036 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5037 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5038 5039 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5040 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5041 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5042 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5043 5044 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5045 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5046 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5047 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5048 5049 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5050 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5051 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5052 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5053 5054 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5055 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5056 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5057 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5058 5059 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5060 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5061 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5062 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5063 5064 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5065 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5066 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5067 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5068 5069 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5070 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5071 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5072 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5073 5074 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5075 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5076 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5077 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5078 5079 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5080 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5081 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5082 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5083} 5084 5085let Predicates = [HasAVX512] in { 5086 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5087 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5088 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5089 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5090 5091 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5092 (VPORQZrr VR512:$src1, VR512:$src2)>; 5093 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5094 (VPORQZrr VR512:$src1, VR512:$src2)>; 5095 5096 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5097 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5098 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5099 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5100 5101 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5102 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5103 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5104 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5105 5106 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5107 (VPANDQZrm VR512:$src1, addr:$src2)>; 5108 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5109 (VPANDQZrm VR512:$src1, addr:$src2)>; 5110 5111 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5112 (VPORQZrm VR512:$src1, addr:$src2)>; 5113 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5114 (VPORQZrm VR512:$src1, addr:$src2)>; 5115 5116 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5117 (VPXORQZrm VR512:$src1, addr:$src2)>; 5118 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5119 (VPXORQZrm VR512:$src1, addr:$src2)>; 5120 5121 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5122 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5123 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5124 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5125} 5126 5127// Patterns to catch vselect with different type than logic op. 5128multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5129 X86VectorVTInfo _, 5130 X86VectorVTInfo IntInfo> { 5131 // Masked register-register logical operations. 5132 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5133 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5134 _.RC:$src0)), 5135 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5136 _.RC:$src1, _.RC:$src2)>; 5137 5138 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5139 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5140 _.ImmAllZerosV)), 5141 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5142 _.RC:$src2)>; 5143 5144 // Masked register-memory logical operations. 5145 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5146 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5147 (load addr:$src2)))), 5148 _.RC:$src0)), 5149 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5150 _.RC:$src1, addr:$src2)>; 5151 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5152 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5153 (load addr:$src2)))), 5154 _.ImmAllZerosV)), 5155 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5156 addr:$src2)>; 5157} 5158 5159multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5160 X86VectorVTInfo _, 5161 X86VectorVTInfo IntInfo> { 5162 // Register-broadcast logical operations. 5163 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5164 (bitconvert 5165 (IntInfo.VT (OpNode _.RC:$src1, 5166 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5167 _.RC:$src0)), 5168 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5169 _.RC:$src1, addr:$src2)>; 5170 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5171 (bitconvert 5172 (IntInfo.VT (OpNode _.RC:$src1, 5173 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5174 _.ImmAllZerosV)), 5175 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5176 _.RC:$src1, addr:$src2)>; 5177} 5178 5179multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5180 AVX512VLVectorVTInfo SelectInfo, 5181 AVX512VLVectorVTInfo IntInfo> { 5182let Predicates = [HasVLX] in { 5183 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5184 IntInfo.info128>; 5185 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5186 IntInfo.info256>; 5187} 5188let Predicates = [HasAVX512] in { 5189 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5190 IntInfo.info512>; 5191} 5192} 5193 5194multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5195 AVX512VLVectorVTInfo SelectInfo, 5196 AVX512VLVectorVTInfo IntInfo> { 5197let Predicates = [HasVLX] in { 5198 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5199 SelectInfo.info128, IntInfo.info128>; 5200 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5201 SelectInfo.info256, IntInfo.info256>; 5202} 5203let Predicates = [HasAVX512] in { 5204 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5205 SelectInfo.info512, IntInfo.info512>; 5206} 5207} 5208 5209multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5210 // i64 vselect with i32/i16/i8 logic op 5211 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5212 avx512vl_i32_info>; 5213 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5214 avx512vl_i16_info>; 5215 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5216 avx512vl_i8_info>; 5217 5218 // i32 vselect with i64/i16/i8 logic op 5219 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5220 avx512vl_i64_info>; 5221 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5222 avx512vl_i16_info>; 5223 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5224 avx512vl_i8_info>; 5225 5226 // f32 vselect with i64/i32/i16/i8 logic op 5227 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5228 avx512vl_i64_info>; 5229 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5230 avx512vl_i32_info>; 5231 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5232 avx512vl_i16_info>; 5233 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5234 avx512vl_i8_info>; 5235 5236 // f64 vselect with i64/i32/i16/i8 logic op 5237 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5238 avx512vl_i64_info>; 5239 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5240 avx512vl_i32_info>; 5241 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5242 avx512vl_i16_info>; 5243 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5244 avx512vl_i8_info>; 5245 5246 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5247 avx512vl_f32_info, 5248 avx512vl_i32_info>; 5249 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5250 avx512vl_f64_info, 5251 avx512vl_i64_info>; 5252} 5253 5254defm : avx512_logical_lowering_types<"VPAND", and>; 5255defm : avx512_logical_lowering_types<"VPOR", or>; 5256defm : avx512_logical_lowering_types<"VPXOR", xor>; 5257defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5258 5259//===----------------------------------------------------------------------===// 5260// AVX-512 FP arithmetic 5261//===----------------------------------------------------------------------===// 5262 5263multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5264 SDPatternOperator OpNode, SDNode VecNode, 5265 X86FoldableSchedWrite sched, bit IsCommutable> { 5266 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5267 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5268 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5269 "$src2, $src1", "$src1, $src2", 5270 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5271 Sched<[sched]>; 5272 5273 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5274 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5275 "$src2, $src1", "$src1, $src2", 5276 (_.VT (VecNode _.RC:$src1, 5277 (_.ScalarIntMemFrags addr:$src2)))>, 5278 Sched<[sched.Folded, sched.ReadAfterFold]>; 5279 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5280 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5281 (ins _.FRC:$src1, _.FRC:$src2), 5282 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5283 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5284 Sched<[sched]> { 5285 let isCommutable = IsCommutable; 5286 } 5287 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5288 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5289 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5290 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5291 (_.ScalarLdFrag addr:$src2)))]>, 5292 Sched<[sched.Folded, sched.ReadAfterFold]>; 5293 } 5294 } 5295} 5296 5297multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5298 SDNode VecNode, X86FoldableSchedWrite sched, 5299 bit IsCommutable = 0> { 5300 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5301 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5302 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5303 "$rc, $src2, $src1", "$src1, $src2, $rc", 5304 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5305 (i32 timm:$rc))>, 5306 EVEX_B, EVEX_RC, Sched<[sched]>; 5307} 5308multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5309 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5310 X86FoldableSchedWrite sched, bit IsCommutable, 5311 string EVEX2VexOvrd> { 5312 let ExeDomain = _.ExeDomain in { 5313 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5314 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5315 "$src2, $src1", "$src1, $src2", 5316 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5317 Sched<[sched]>, SIMD_EXC; 5318 5319 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5320 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5321 "$src2, $src1", "$src1, $src2", 5322 (_.VT (VecNode _.RC:$src1, 5323 (_.ScalarIntMemFrags addr:$src2)))>, 5324 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5325 5326 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5327 Uses = [MXCSR], mayRaiseFPException = 1 in { 5328 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5329 (ins _.FRC:$src1, _.FRC:$src2), 5330 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5331 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5332 Sched<[sched]>, 5333 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5334 let isCommutable = IsCommutable; 5335 } 5336 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5337 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5338 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5339 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5340 (_.ScalarLdFrag addr:$src2)))]>, 5341 Sched<[sched.Folded, sched.ReadAfterFold]>, 5342 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5343 } 5344 5345 let Uses = [MXCSR] in 5346 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5347 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5348 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5349 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5350 EVEX_B, Sched<[sched]>; 5351 } 5352} 5353 5354multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5355 SDNode VecNode, SDNode RndNode, 5356 X86SchedWriteSizes sched, bit IsCommutable> { 5357 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5358 sched.PS.Scl, IsCommutable>, 5359 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5360 sched.PS.Scl, IsCommutable>, 5361 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5362 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5363 sched.PD.Scl, IsCommutable>, 5364 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5365 sched.PD.Scl, IsCommutable>, 5366 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5367} 5368 5369multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5370 SDNode VecNode, SDNode SaeNode, 5371 X86SchedWriteSizes sched, bit IsCommutable> { 5372 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5373 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5374 NAME#"SS">, 5375 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5376 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5377 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5378 NAME#"SD">, 5379 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5380} 5381defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5382 SchedWriteFAddSizes, 1>; 5383defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5384 SchedWriteFMulSizes, 1>; 5385defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5386 SchedWriteFAddSizes, 0>; 5387defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5388 SchedWriteFDivSizes, 0>; 5389defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5390 SchedWriteFCmpSizes, 0>; 5391defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5392 SchedWriteFCmpSizes, 0>; 5393 5394// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5395// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5396multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5397 X86VectorVTInfo _, SDNode OpNode, 5398 X86FoldableSchedWrite sched, 5399 string EVEX2VEXOvrd> { 5400 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5401 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5402 (ins _.FRC:$src1, _.FRC:$src2), 5403 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5404 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5405 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5406 let isCommutable = 1; 5407 } 5408 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5409 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5410 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5411 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5412 (_.ScalarLdFrag addr:$src2)))]>, 5413 Sched<[sched.Folded, sched.ReadAfterFold]>, 5414 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5415 } 5416} 5417defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5418 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5419 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5420 5421defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5422 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5423 VEX_W, EVEX_4V, VEX_LIG, 5424 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5425 5426defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5427 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5428 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5429 5430defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5431 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5432 VEX_W, EVEX_4V, VEX_LIG, 5433 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5434 5435multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5436 SDPatternOperator MaskOpNode, 5437 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5438 bit IsCommutable, 5439 bit IsKCommutable = IsCommutable> { 5440 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5441 Uses = [MXCSR], mayRaiseFPException = 1 in { 5442 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5443 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5444 "$src2, $src1", "$src1, $src2", 5445 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5446 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 5447 IsKCommutable, IsKCommutable>, 5448 EVEX_4V, Sched<[sched]>; 5449 let mayLoad = 1 in { 5450 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5451 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5452 "$src2, $src1", "$src1, $src2", 5453 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5454 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5455 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5456 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5457 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5458 "${src2}"#_.BroadcastStr#", $src1", 5459 "$src1, ${src2}"#_.BroadcastStr, 5460 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5461 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5462 EVEX_4V, EVEX_B, 5463 Sched<[sched.Folded, sched.ReadAfterFold]>; 5464 } 5465 } 5466} 5467 5468multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5469 SDPatternOperator OpNodeRnd, 5470 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5471 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5472 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5473 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix, 5474 "$rc, $src2, $src1", "$src1, $src2, $rc", 5475 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>, 5476 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5477} 5478 5479multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5480 SDPatternOperator OpNodeSAE, 5481 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5482 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5483 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5484 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5485 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5486 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5487 EVEX_4V, EVEX_B, Sched<[sched]>; 5488} 5489 5490multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5491 SDPatternOperator MaskOpNode, 5492 Predicate prd, X86SchedWriteSizes sched, 5493 bit IsCommutable = 0, 5494 bit IsPD128Commutable = IsCommutable> { 5495 let Predicates = [prd] in { 5496 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5497 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5498 EVEX_CD8<32, CD8VF>; 5499 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5500 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5501 EVEX_CD8<64, CD8VF>; 5502 } 5503 5504 // Define only if AVX512VL feature is present. 5505 let Predicates = [prd, HasVLX] in { 5506 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5507 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5508 EVEX_CD8<32, CD8VF>; 5509 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5510 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5511 EVEX_CD8<32, CD8VF>; 5512 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5513 sched.PD.XMM, IsPD128Commutable, 5514 IsCommutable>, EVEX_V128, PD, VEX_W, 5515 EVEX_CD8<64, CD8VF>; 5516 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5517 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5518 EVEX_CD8<64, CD8VF>; 5519 } 5520} 5521 5522let Uses = [MXCSR] in 5523multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5524 X86SchedWriteSizes sched> { 5525 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5526 v16f32_info>, 5527 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5528 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5529 v8f64_info>, 5530 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5531} 5532 5533let Uses = [MXCSR] in 5534multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5535 X86SchedWriteSizes sched> { 5536 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5537 v16f32_info>, 5538 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5539 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5540 v8f64_info>, 5541 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5542} 5543 5544defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5545 SchedWriteFAddSizes, 1>, 5546 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5547defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5548 SchedWriteFMulSizes, 1>, 5549 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5550defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5551 SchedWriteFAddSizes>, 5552 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5553defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5554 SchedWriteFDivSizes>, 5555 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5556defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5557 SchedWriteFCmpSizes, 0>, 5558 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5559defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5560 SchedWriteFCmpSizes, 0>, 5561 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5562let isCodeGenOnly = 1 in { 5563 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5564 SchedWriteFCmpSizes, 1>; 5565 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5566 SchedWriteFCmpSizes, 1>; 5567} 5568let Uses = []<Register>, mayRaiseFPException = 0 in { 5569defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5570 SchedWriteFLogicSizes, 1>; 5571defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5572 SchedWriteFLogicSizes, 0>; 5573defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5574 SchedWriteFLogicSizes, 1>; 5575defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5576 SchedWriteFLogicSizes, 1>; 5577} 5578 5579multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5580 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5581 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5582 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5583 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5584 "$src2, $src1", "$src1, $src2", 5585 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5586 EVEX_4V, Sched<[sched]>; 5587 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5588 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5589 "$src2, $src1", "$src1, $src2", 5590 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5591 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5592 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5593 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5594 "${src2}"#_.BroadcastStr#", $src1", 5595 "$src1, ${src2}"#_.BroadcastStr, 5596 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5597 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5598 } 5599} 5600 5601multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5602 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5603 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5604 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5605 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5606 "$src2, $src1", "$src1, $src2", 5607 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5608 Sched<[sched]>; 5609 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5610 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5611 "$src2, $src1", "$src1, $src2", 5612 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5613 Sched<[sched.Folded, sched.ReadAfterFold]>; 5614 } 5615} 5616 5617multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5618 X86SchedWriteWidths sched> { 5619 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5620 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5621 EVEX_V512, EVEX_CD8<32, CD8VF>; 5622 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5623 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5624 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 5625 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5626 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5627 X86scalefsRnd, sched.Scl>, 5628 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5629 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5630 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5631 X86scalefsRnd, sched.Scl>, 5632 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W; 5633 5634 // Define only if AVX512VL feature is present. 5635 let Predicates = [HasVLX] in { 5636 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5637 EVEX_V128, EVEX_CD8<32, CD8VF>; 5638 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5639 EVEX_V256, EVEX_CD8<32, CD8VF>; 5640 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5641 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 5642 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5643 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 5644 } 5645} 5646defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 5647 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; 5648 5649//===----------------------------------------------------------------------===// 5650// AVX-512 VPTESTM instructions 5651//===----------------------------------------------------------------------===// 5652 5653multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5654 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5655 string Name> { 5656 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5657 // There are just too many permutations due to commutability and bitcasts. 5658 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5659 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5660 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5661 "$src2, $src1", "$src1, $src2", 5662 (null_frag), (null_frag), 1>, 5663 EVEX_4V, Sched<[sched]>; 5664 let mayLoad = 1 in 5665 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5666 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5667 "$src2, $src1", "$src1, $src2", 5668 (null_frag), (null_frag)>, 5669 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5670 Sched<[sched.Folded, sched.ReadAfterFold]>; 5671 } 5672} 5673 5674multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5675 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5676 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5677 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5678 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5679 "${src2}"#_.BroadcastStr#", $src1", 5680 "$src1, ${src2}"#_.BroadcastStr, 5681 (null_frag), (null_frag)>, 5682 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5683 Sched<[sched.Folded, sched.ReadAfterFold]>; 5684} 5685 5686multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5687 X86SchedWriteWidths sched, 5688 AVX512VLVectorVTInfo _> { 5689 let Predicates = [HasAVX512] in 5690 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>, 5691 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5692 5693 let Predicates = [HasAVX512, HasVLX] in { 5694 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>, 5695 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5696 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>, 5697 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5698 } 5699} 5700 5701multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5702 X86SchedWriteWidths sched> { 5703 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5704 avx512vl_i32_info>; 5705 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5706 avx512vl_i64_info>, VEX_W; 5707} 5708 5709multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5710 X86SchedWriteWidths sched> { 5711 let Predicates = [HasBWI] in { 5712 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5713 v32i16_info, NAME#"W">, EVEX_V512, VEX_W; 5714 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5715 v64i8_info, NAME#"B">, EVEX_V512; 5716 } 5717 let Predicates = [HasVLX, HasBWI] in { 5718 5719 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5720 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W; 5721 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5722 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W; 5723 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5724 v32i8x_info, NAME#"B">, EVEX_V256; 5725 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5726 v16i8x_info, NAME#"B">, EVEX_V128; 5727 } 5728} 5729 5730multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5731 X86SchedWriteWidths sched> : 5732 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5733 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5734 5735defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5736 SchedWriteVecLogic>, T8PD; 5737defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5738 SchedWriteVecLogic>, T8XS; 5739 5740//===----------------------------------------------------------------------===// 5741// AVX-512 Shift instructions 5742//===----------------------------------------------------------------------===// 5743 5744multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5745 string OpcodeStr, SDNode OpNode, 5746 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5747 let ExeDomain = _.ExeDomain in { 5748 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5749 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5750 "$src2, $src1", "$src1, $src2", 5751 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5752 Sched<[sched]>; 5753 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5754 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5755 "$src2, $src1", "$src1, $src2", 5756 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5757 (i8 timm:$src2)))>, 5758 Sched<[sched.Folded]>; 5759 } 5760} 5761 5762multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5763 string OpcodeStr, SDNode OpNode, 5764 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5765 let ExeDomain = _.ExeDomain in 5766 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5767 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5768 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 5769 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5770 EVEX_B, Sched<[sched.Folded]>; 5771} 5772 5773multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5774 X86FoldableSchedWrite sched, ValueType SrcVT, 5775 X86VectorVTInfo _> { 5776 // src2 is always 128-bit 5777 let ExeDomain = _.ExeDomain in { 5778 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5779 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5780 "$src2, $src1", "$src1, $src2", 5781 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5782 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5783 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5784 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5785 "$src2, $src1", "$src1, $src2", 5786 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5787 AVX512BIBase, 5788 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5789 } 5790} 5791 5792multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5793 X86SchedWriteWidths sched, ValueType SrcVT, 5794 AVX512VLVectorVTInfo VTInfo, 5795 Predicate prd> { 5796 let Predicates = [prd] in 5797 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5798 VTInfo.info512>, EVEX_V512, 5799 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5800 let Predicates = [prd, HasVLX] in { 5801 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5802 VTInfo.info256>, EVEX_V256, 5803 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5804 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5805 VTInfo.info128>, EVEX_V128, 5806 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5807 } 5808} 5809 5810multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5811 string OpcodeStr, SDNode OpNode, 5812 X86SchedWriteWidths sched, 5813 bit NotEVEX2VEXConvertibleQ = 0> { 5814 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5815 avx512vl_i32_info, HasAVX512>; 5816 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5817 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5818 avx512vl_i64_info, HasAVX512>, VEX_W; 5819 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5820 avx512vl_i16_info, HasBWI>; 5821} 5822 5823multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5824 string OpcodeStr, SDNode OpNode, 5825 X86SchedWriteWidths sched, 5826 AVX512VLVectorVTInfo VTInfo> { 5827 let Predicates = [HasAVX512] in 5828 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5829 sched.ZMM, VTInfo.info512>, 5830 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5831 VTInfo.info512>, EVEX_V512; 5832 let Predicates = [HasAVX512, HasVLX] in { 5833 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5834 sched.YMM, VTInfo.info256>, 5835 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5836 VTInfo.info256>, EVEX_V256; 5837 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5838 sched.XMM, VTInfo.info128>, 5839 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5840 VTInfo.info128>, EVEX_V128; 5841 } 5842} 5843 5844multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5845 string OpcodeStr, SDNode OpNode, 5846 X86SchedWriteWidths sched> { 5847 let Predicates = [HasBWI] in 5848 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5849 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 5850 let Predicates = [HasVLX, HasBWI] in { 5851 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5852 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 5853 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5854 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 5855 } 5856} 5857 5858multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5859 Format ImmFormR, Format ImmFormM, 5860 string OpcodeStr, SDNode OpNode, 5861 X86SchedWriteWidths sched, 5862 bit NotEVEX2VEXConvertibleQ = 0> { 5863 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 5864 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 5865 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5866 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 5867 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 5868} 5869 5870defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 5871 SchedWriteVecShiftImm>, 5872 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 5873 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5874 5875defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 5876 SchedWriteVecShiftImm>, 5877 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 5878 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5879 5880defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 5881 SchedWriteVecShiftImm, 1>, 5882 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 5883 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5884 5885defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 5886 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5887defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 5888 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5889 5890defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 5891 SchedWriteVecShift>; 5892defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 5893 SchedWriteVecShift, 1>; 5894defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 5895 SchedWriteVecShift>; 5896 5897// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 5898let Predicates = [HasAVX512, NoVLX] in { 5899 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 5900 (EXTRACT_SUBREG (v8i64 5901 (VPSRAQZrr 5902 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5903 VR128X:$src2)), sub_ymm)>; 5904 5905 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5906 (EXTRACT_SUBREG (v8i64 5907 (VPSRAQZrr 5908 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5909 VR128X:$src2)), sub_xmm)>; 5910 5911 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 5912 (EXTRACT_SUBREG (v8i64 5913 (VPSRAQZri 5914 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5915 timm:$src2)), sub_ymm)>; 5916 5917 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 5918 (EXTRACT_SUBREG (v8i64 5919 (VPSRAQZri 5920 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5921 timm:$src2)), sub_xmm)>; 5922} 5923 5924//===-------------------------------------------------------------------===// 5925// Variable Bit Shifts 5926//===-------------------------------------------------------------------===// 5927 5928multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 5929 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5930 let ExeDomain = _.ExeDomain in { 5931 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5932 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5933 "$src2, $src1", "$src1, $src2", 5934 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 5935 AVX5128IBase, EVEX_4V, Sched<[sched]>; 5936 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5937 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5938 "$src2, $src1", "$src1, $src2", 5939 (_.VT (OpNode _.RC:$src1, 5940 (_.VT (_.LdFrag addr:$src2))))>, 5941 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5942 Sched<[sched.Folded, sched.ReadAfterFold]>; 5943 } 5944} 5945 5946multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5947 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5948 let ExeDomain = _.ExeDomain in 5949 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5950 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5951 "${src2}"#_.BroadcastStr#", $src1", 5952 "$src1, ${src2}"#_.BroadcastStr, 5953 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 5954 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5955 Sched<[sched.Folded, sched.ReadAfterFold]>; 5956} 5957 5958multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5959 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 5960 let Predicates = [HasAVX512] in 5961 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 5962 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 5963 5964 let Predicates = [HasAVX512, HasVLX] in { 5965 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 5966 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 5967 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 5968 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 5969 } 5970} 5971 5972multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 5973 SDNode OpNode, X86SchedWriteWidths sched> { 5974 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 5975 avx512vl_i32_info>; 5976 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 5977 avx512vl_i64_info>, VEX_W; 5978} 5979 5980// Use 512bit version to implement 128/256 bit in case NoVLX. 5981multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 5982 SDNode OpNode, list<Predicate> p> { 5983 let Predicates = p in { 5984 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 5985 (_.info256.VT _.info256.RC:$src2))), 5986 (EXTRACT_SUBREG 5987 (!cast<Instruction>(OpcodeStr#"Zrr") 5988 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5989 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5990 sub_ymm)>; 5991 5992 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 5993 (_.info128.VT _.info128.RC:$src2))), 5994 (EXTRACT_SUBREG 5995 (!cast<Instruction>(OpcodeStr#"Zrr") 5996 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5997 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5998 sub_xmm)>; 5999 } 6000} 6001multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6002 SDNode OpNode, X86SchedWriteWidths sched> { 6003 let Predicates = [HasBWI] in 6004 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6005 EVEX_V512, VEX_W; 6006 let Predicates = [HasVLX, HasBWI] in { 6007 6008 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6009 EVEX_V256, VEX_W; 6010 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6011 EVEX_V128, VEX_W; 6012 } 6013} 6014 6015defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6016 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6017 6018defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6019 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6020 6021defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6022 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6023 6024defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6025defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6026 6027defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6028defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6029defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6030defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6031 6032 6033// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6034let Predicates = [HasAVX512, NoVLX] in { 6035 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6036 (EXTRACT_SUBREG (v8i64 6037 (VPROLVQZrr 6038 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6039 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6040 sub_xmm)>; 6041 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6042 (EXTRACT_SUBREG (v8i64 6043 (VPROLVQZrr 6044 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6045 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6046 sub_ymm)>; 6047 6048 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6049 (EXTRACT_SUBREG (v16i32 6050 (VPROLVDZrr 6051 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6052 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6053 sub_xmm)>; 6054 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6055 (EXTRACT_SUBREG (v16i32 6056 (VPROLVDZrr 6057 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6058 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6059 sub_ymm)>; 6060 6061 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6062 (EXTRACT_SUBREG (v8i64 6063 (VPROLQZri 6064 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6065 timm:$src2)), sub_xmm)>; 6066 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6067 (EXTRACT_SUBREG (v8i64 6068 (VPROLQZri 6069 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6070 timm:$src2)), sub_ymm)>; 6071 6072 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6073 (EXTRACT_SUBREG (v16i32 6074 (VPROLDZri 6075 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6076 timm:$src2)), sub_xmm)>; 6077 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6078 (EXTRACT_SUBREG (v16i32 6079 (VPROLDZri 6080 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6081 timm:$src2)), sub_ymm)>; 6082} 6083 6084// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6085let Predicates = [HasAVX512, NoVLX] in { 6086 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6087 (EXTRACT_SUBREG (v8i64 6088 (VPRORVQZrr 6089 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6090 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6091 sub_xmm)>; 6092 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6093 (EXTRACT_SUBREG (v8i64 6094 (VPRORVQZrr 6095 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6096 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6097 sub_ymm)>; 6098 6099 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6100 (EXTRACT_SUBREG (v16i32 6101 (VPRORVDZrr 6102 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6103 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6104 sub_xmm)>; 6105 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6106 (EXTRACT_SUBREG (v16i32 6107 (VPRORVDZrr 6108 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6109 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6110 sub_ymm)>; 6111 6112 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6113 (EXTRACT_SUBREG (v8i64 6114 (VPRORQZri 6115 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6116 timm:$src2)), sub_xmm)>; 6117 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6118 (EXTRACT_SUBREG (v8i64 6119 (VPRORQZri 6120 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6121 timm:$src2)), sub_ymm)>; 6122 6123 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6124 (EXTRACT_SUBREG (v16i32 6125 (VPRORDZri 6126 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6127 timm:$src2)), sub_xmm)>; 6128 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6129 (EXTRACT_SUBREG (v16i32 6130 (VPRORDZri 6131 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6132 timm:$src2)), sub_ymm)>; 6133} 6134 6135//===-------------------------------------------------------------------===// 6136// 1-src variable permutation VPERMW/D/Q 6137//===-------------------------------------------------------------------===// 6138 6139multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6140 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6141 let Predicates = [HasAVX512] in 6142 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6143 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6144 6145 let Predicates = [HasAVX512, HasVLX] in 6146 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6147 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6148} 6149 6150multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6151 string OpcodeStr, SDNode OpNode, 6152 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6153 let Predicates = [HasAVX512] in 6154 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6155 sched, VTInfo.info512>, 6156 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6157 sched, VTInfo.info512>, EVEX_V512; 6158 let Predicates = [HasAVX512, HasVLX] in 6159 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6160 sched, VTInfo.info256>, 6161 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6162 sched, VTInfo.info256>, EVEX_V256; 6163} 6164 6165multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6166 Predicate prd, SDNode OpNode, 6167 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6168 let Predicates = [prd] in 6169 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6170 EVEX_V512 ; 6171 let Predicates = [HasVLX, prd] in { 6172 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6173 EVEX_V256 ; 6174 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6175 EVEX_V128 ; 6176 } 6177} 6178 6179defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6180 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6181defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6182 WriteVarShuffle256, avx512vl_i8_info>; 6183 6184defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6185 WriteVarShuffle256, avx512vl_i32_info>; 6186defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6187 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6188defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6189 WriteFVarShuffle256, avx512vl_f32_info>; 6190defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6191 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6192 6193defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6194 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6195 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6196defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6197 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6198 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6199 6200//===----------------------------------------------------------------------===// 6201// AVX-512 - VPERMIL 6202//===----------------------------------------------------------------------===// 6203 6204multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6205 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6206 X86VectorVTInfo Ctrl> { 6207 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6208 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6209 "$src2, $src1", "$src1, $src2", 6210 (_.VT (OpNode _.RC:$src1, 6211 (Ctrl.VT Ctrl.RC:$src2)))>, 6212 T8PD, EVEX_4V, Sched<[sched]>; 6213 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6214 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6215 "$src2, $src1", "$src1, $src2", 6216 (_.VT (OpNode 6217 _.RC:$src1, 6218 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6219 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6220 Sched<[sched.Folded, sched.ReadAfterFold]>; 6221 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6222 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6223 "${src2}"#_.BroadcastStr#", $src1", 6224 "$src1, ${src2}"#_.BroadcastStr, 6225 (_.VT (OpNode 6226 _.RC:$src1, 6227 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6228 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6229 Sched<[sched.Folded, sched.ReadAfterFold]>; 6230} 6231 6232multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6233 X86SchedWriteWidths sched, 6234 AVX512VLVectorVTInfo _, 6235 AVX512VLVectorVTInfo Ctrl> { 6236 let Predicates = [HasAVX512] in { 6237 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6238 _.info512, Ctrl.info512>, EVEX_V512; 6239 } 6240 let Predicates = [HasAVX512, HasVLX] in { 6241 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6242 _.info128, Ctrl.info128>, EVEX_V128; 6243 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6244 _.info256, Ctrl.info256>, EVEX_V256; 6245 } 6246} 6247 6248multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6249 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6250 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6251 _, Ctrl>; 6252 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6253 X86VPermilpi, SchedWriteFShuffle, _>, 6254 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6255} 6256 6257let ExeDomain = SSEPackedSingle in 6258defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6259 avx512vl_i32_info>; 6260let ExeDomain = SSEPackedDouble in 6261defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6262 avx512vl_i64_info>, VEX_W1X; 6263 6264//===----------------------------------------------------------------------===// 6265// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6266//===----------------------------------------------------------------------===// 6267 6268defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6269 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6270 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6271defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6272 X86PShufhw, SchedWriteShuffle>, 6273 EVEX, AVX512XSIi8Base; 6274defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6275 X86PShuflw, SchedWriteShuffle>, 6276 EVEX, AVX512XDIi8Base; 6277 6278//===----------------------------------------------------------------------===// 6279// AVX-512 - VPSHUFB 6280//===----------------------------------------------------------------------===// 6281 6282multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6283 X86SchedWriteWidths sched> { 6284 let Predicates = [HasBWI] in 6285 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6286 EVEX_V512; 6287 6288 let Predicates = [HasVLX, HasBWI] in { 6289 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6290 EVEX_V256; 6291 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6292 EVEX_V128; 6293 } 6294} 6295 6296defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6297 SchedWriteVarShuffle>, VEX_WIG; 6298 6299//===----------------------------------------------------------------------===// 6300// Move Low to High and High to Low packed FP Instructions 6301//===----------------------------------------------------------------------===// 6302 6303def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6304 (ins VR128X:$src1, VR128X:$src2), 6305 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6306 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6307 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6308let isCommutable = 1 in 6309def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6310 (ins VR128X:$src1, VR128X:$src2), 6311 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6312 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6313 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6314 6315//===----------------------------------------------------------------------===// 6316// VMOVHPS/PD VMOVLPS Instructions 6317// All patterns was taken from SSS implementation. 6318//===----------------------------------------------------------------------===// 6319 6320multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6321 SDPatternOperator OpNode, 6322 X86VectorVTInfo _> { 6323 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6324 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6325 (ins _.RC:$src1, f64mem:$src2), 6326 !strconcat(OpcodeStr, 6327 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6328 [(set _.RC:$dst, 6329 (OpNode _.RC:$src1, 6330 (_.VT (bitconvert 6331 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6332 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6333} 6334 6335// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6336// SSE1. And MOVLPS pattern is even more complex. 6337defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6338 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6339defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6340 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6341defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6342 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6343defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6344 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6345 6346let Predicates = [HasAVX512] in { 6347 // VMOVHPD patterns 6348 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6349 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6350 6351 // VMOVLPD patterns 6352 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6353 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6354} 6355 6356let SchedRW = [WriteFStore] in { 6357let mayStore = 1, hasSideEffects = 0 in 6358def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6359 (ins f64mem:$dst, VR128X:$src), 6360 "vmovhps\t{$src, $dst|$dst, $src}", 6361 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6362def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6363 (ins f64mem:$dst, VR128X:$src), 6364 "vmovhpd\t{$src, $dst|$dst, $src}", 6365 [(store (f64 (extractelt 6366 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6367 (iPTR 0))), addr:$dst)]>, 6368 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6369let mayStore = 1, hasSideEffects = 0 in 6370def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6371 (ins f64mem:$dst, VR128X:$src), 6372 "vmovlps\t{$src, $dst|$dst, $src}", 6373 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6374def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6375 (ins f64mem:$dst, VR128X:$src), 6376 "vmovlpd\t{$src, $dst|$dst, $src}", 6377 [(store (f64 (extractelt (v2f64 VR128X:$src), 6378 (iPTR 0))), addr:$dst)]>, 6379 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6380} // SchedRW 6381 6382let Predicates = [HasAVX512] in { 6383 // VMOVHPD patterns 6384 def : Pat<(store (f64 (extractelt 6385 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6386 (iPTR 0))), addr:$dst), 6387 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6388} 6389//===----------------------------------------------------------------------===// 6390// FMA - Fused Multiply Operations 6391// 6392 6393multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6394 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6395 X86VectorVTInfo _, string Suff> { 6396 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6397 Uses = [MXCSR], mayRaiseFPException = 1 in { 6398 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6399 (ins _.RC:$src2, _.RC:$src3), 6400 OpcodeStr, "$src3, $src2", "$src2, $src3", 6401 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6402 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6403 AVX512FMA3Base, Sched<[sched]>; 6404 6405 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6406 (ins _.RC:$src2, _.MemOp:$src3), 6407 OpcodeStr, "$src3, $src2", "$src2, $src3", 6408 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6409 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6410 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6411 6412 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6413 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6414 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6415 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6416 (OpNode _.RC:$src2, 6417 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6418 (MaskOpNode _.RC:$src2, 6419 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6420 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6421 } 6422} 6423 6424multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6425 X86FoldableSchedWrite sched, 6426 X86VectorVTInfo _, string Suff> { 6427 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6428 Uses = [MXCSR] in 6429 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6430 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6431 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6432 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6433 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6434 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6435} 6436 6437multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6438 SDNode MaskOpNode, SDNode OpNodeRnd, 6439 X86SchedWriteWidths sched, 6440 AVX512VLVectorVTInfo _, string Suff> { 6441 let Predicates = [HasAVX512] in { 6442 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6443 sched.ZMM, _.info512, Suff>, 6444 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6445 _.info512, Suff>, 6446 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6447 } 6448 let Predicates = [HasVLX, HasAVX512] in { 6449 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6450 sched.YMM, _.info256, Suff>, 6451 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6452 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6453 sched.XMM, _.info128, Suff>, 6454 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6455 } 6456} 6457 6458multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6459 SDNode MaskOpNode, SDNode OpNodeRnd> { 6460 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6461 OpNodeRnd, SchedWriteFMA, 6462 avx512vl_f32_info, "PS">; 6463 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6464 OpNodeRnd, SchedWriteFMA, 6465 avx512vl_f64_info, "PD">, VEX_W; 6466} 6467 6468defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6469 fma, X86FmaddRnd>; 6470defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6471 X86Fmsub, X86FmsubRnd>; 6472defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6473 X86Fmaddsub, X86FmaddsubRnd>; 6474defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6475 X86Fmsubadd, X86FmsubaddRnd>; 6476defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6477 X86Fnmadd, X86FnmaddRnd>; 6478defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6479 X86Fnmsub, X86FnmsubRnd>; 6480 6481 6482multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6483 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6484 X86VectorVTInfo _, string Suff> { 6485 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6486 Uses = [MXCSR], mayRaiseFPException = 1 in { 6487 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6488 (ins _.RC:$src2, _.RC:$src3), 6489 OpcodeStr, "$src3, $src2", "$src2, $src3", 6490 (null_frag), 6491 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6492 AVX512FMA3Base, Sched<[sched]>; 6493 6494 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6495 (ins _.RC:$src2, _.MemOp:$src3), 6496 OpcodeStr, "$src3, $src2", "$src2, $src3", 6497 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6498 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6499 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6500 6501 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6502 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6503 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6504 "$src2, ${src3}"#_.BroadcastStr, 6505 (_.VT (OpNode _.RC:$src2, 6506 (_.VT (_.BroadcastLdFrag addr:$src3)), 6507 _.RC:$src1)), 6508 (_.VT (MaskOpNode _.RC:$src2, 6509 (_.VT (_.BroadcastLdFrag addr:$src3)), 6510 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, 6511 Sched<[sched.Folded, sched.ReadAfterFold]>; 6512 } 6513} 6514 6515multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6516 X86FoldableSchedWrite sched, 6517 X86VectorVTInfo _, string Suff> { 6518 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6519 Uses = [MXCSR] in 6520 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6521 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6522 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6523 (null_frag), 6524 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6525 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6526} 6527 6528multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6529 SDNode MaskOpNode, SDNode OpNodeRnd, 6530 X86SchedWriteWidths sched, 6531 AVX512VLVectorVTInfo _, string Suff> { 6532 let Predicates = [HasAVX512] in { 6533 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6534 sched.ZMM, _.info512, Suff>, 6535 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6536 _.info512, Suff>, 6537 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6538 } 6539 let Predicates = [HasVLX, HasAVX512] in { 6540 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6541 sched.YMM, _.info256, Suff>, 6542 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6543 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6544 sched.XMM, _.info128, Suff>, 6545 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6546 } 6547} 6548 6549multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6550 SDNode MaskOpNode, SDNode OpNodeRnd > { 6551 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6552 OpNodeRnd, SchedWriteFMA, 6553 avx512vl_f32_info, "PS">; 6554 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6555 OpNodeRnd, SchedWriteFMA, 6556 avx512vl_f64_info, "PD">, VEX_W; 6557} 6558 6559defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6560 fma, X86FmaddRnd>; 6561defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6562 X86Fmsub, X86FmsubRnd>; 6563defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6564 X86Fmaddsub, X86FmaddsubRnd>; 6565defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6566 X86Fmsubadd, X86FmsubaddRnd>; 6567defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6568 X86Fnmadd, X86FnmaddRnd>; 6569defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6570 X86Fnmsub, X86FnmsubRnd>; 6571 6572multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6573 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6574 X86VectorVTInfo _, string Suff> { 6575 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6576 Uses = [MXCSR], mayRaiseFPException = 1 in { 6577 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6578 (ins _.RC:$src2, _.RC:$src3), 6579 OpcodeStr, "$src3, $src2", "$src2, $src3", 6580 (null_frag), 6581 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6582 AVX512FMA3Base, Sched<[sched]>; 6583 6584 // Pattern is 312 order so that the load is in a different place from the 6585 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6586 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6587 (ins _.RC:$src2, _.MemOp:$src3), 6588 OpcodeStr, "$src3, $src2", "$src2, $src3", 6589 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6590 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6591 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6592 6593 // Pattern is 312 order so that the load is in a different place from the 6594 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6595 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6596 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6597 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6598 "$src2, ${src3}"#_.BroadcastStr, 6599 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6600 _.RC:$src1, _.RC:$src2)), 6601 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6602 _.RC:$src1, _.RC:$src2)), 1, 0>, 6603 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6604 } 6605} 6606 6607multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6608 X86FoldableSchedWrite sched, 6609 X86VectorVTInfo _, string Suff> { 6610 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6611 Uses = [MXCSR] in 6612 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6613 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6614 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6615 (null_frag), 6616 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6617 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6618} 6619 6620multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6621 SDNode MaskOpNode, SDNode OpNodeRnd, 6622 X86SchedWriteWidths sched, 6623 AVX512VLVectorVTInfo _, string Suff> { 6624 let Predicates = [HasAVX512] in { 6625 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6626 sched.ZMM, _.info512, Suff>, 6627 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6628 _.info512, Suff>, 6629 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6630 } 6631 let Predicates = [HasVLX, HasAVX512] in { 6632 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6633 sched.YMM, _.info256, Suff>, 6634 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6635 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6636 sched.XMM, _.info128, Suff>, 6637 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6638 } 6639} 6640 6641multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6642 SDNode MaskOpNode, SDNode OpNodeRnd > { 6643 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6644 OpNodeRnd, SchedWriteFMA, 6645 avx512vl_f32_info, "PS">; 6646 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6647 OpNodeRnd, SchedWriteFMA, 6648 avx512vl_f64_info, "PD">, VEX_W; 6649} 6650 6651defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 6652 fma, X86FmaddRnd>; 6653defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 6654 X86Fmsub, X86FmsubRnd>; 6655defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 6656 X86Fmaddsub, X86FmaddsubRnd>; 6657defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 6658 X86Fmsubadd, X86FmsubaddRnd>; 6659defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 6660 X86Fnmadd, X86FnmaddRnd>; 6661defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 6662 X86Fnmsub, X86FnmsubRnd>; 6663 6664// Scalar FMA 6665multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6666 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6667let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6668 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6669 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6670 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6671 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6672 6673 let mayLoad = 1 in 6674 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6675 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6676 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6677 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6678 6679 let Uses = [MXCSR] in 6680 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6681 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6682 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6683 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6684 6685 let isCodeGenOnly = 1, isCommutable = 1 in { 6686 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6687 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6688 !strconcat(OpcodeStr, 6689 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6690 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6691 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst), 6692 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6693 !strconcat(OpcodeStr, 6694 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6695 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6696 6697 let Uses = [MXCSR] in 6698 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6699 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6700 !strconcat(OpcodeStr, 6701 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6702 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6703 Sched<[SchedWriteFMA.Scl]>; 6704 }// isCodeGenOnly = 1 6705}// Constraints = "$src1 = $dst" 6706} 6707 6708multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6709 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 6710 X86VectorVTInfo _, string SUFF> { 6711 let ExeDomain = _.ExeDomain in { 6712 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6713 // Operands for intrinsic are in 123 order to preserve passthu 6714 // semantics. 6715 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6716 _.FRC:$src3))), 6717 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6718 (_.ScalarLdFrag addr:$src3)))), 6719 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6720 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6721 6722 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6723 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6724 _.FRC:$src1))), 6725 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6726 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6727 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6728 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6729 6730 // One pattern is 312 order so that the load is in a different place from the 6731 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6732 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6733 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6734 _.FRC:$src2))), 6735 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6736 _.FRC:$src1, _.FRC:$src2))), 6737 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6738 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6739 } 6740} 6741 6742multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6743 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 6744 let Predicates = [HasAVX512] in { 6745 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6746 OpNodeRnd, f32x_info, "SS">, 6747 EVEX_CD8<32, CD8VT1>, VEX_LIG; 6748 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6749 OpNodeRnd, f64x_info, "SD">, 6750 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 6751 } 6752} 6753 6754defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 6755defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 6756defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 6757defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 6758 6759multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 6760 SDNode RndOp, string Prefix, 6761 string Suffix, SDNode Move, 6762 X86VectorVTInfo _, PatLeaf ZeroFP> { 6763 let Predicates = [HasAVX512] in { 6764 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6765 (Op _.FRC:$src2, 6766 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6767 _.FRC:$src3))))), 6768 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6769 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6770 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6771 6772 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6773 (Op _.FRC:$src2, _.FRC:$src3, 6774 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6775 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6776 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6777 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6778 6779 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6780 (Op _.FRC:$src2, 6781 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6782 (_.ScalarLdFrag addr:$src3)))))), 6783 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6784 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6785 addr:$src3)>; 6786 6787 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6788 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6789 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6790 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6791 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6792 addr:$src3)>; 6793 6794 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6795 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6796 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6797 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6798 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6799 addr:$src3)>; 6800 6801 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6802 (X86selects_mask VK1WM:$mask, 6803 (MaskedOp _.FRC:$src2, 6804 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6805 _.FRC:$src3), 6806 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6807 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6808 VR128X:$src1, VK1WM:$mask, 6809 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6810 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6811 6812 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6813 (X86selects_mask VK1WM:$mask, 6814 (MaskedOp _.FRC:$src2, 6815 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6816 (_.ScalarLdFrag addr:$src3)), 6817 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6818 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6819 VR128X:$src1, VK1WM:$mask, 6820 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6821 6822 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6823 (X86selects_mask VK1WM:$mask, 6824 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6825 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6826 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6827 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6828 VR128X:$src1, VK1WM:$mask, 6829 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6830 6831 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6832 (X86selects_mask VK1WM:$mask, 6833 (MaskedOp _.FRC:$src2, _.FRC:$src3, 6834 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6835 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6836 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 6837 VR128X:$src1, VK1WM:$mask, 6838 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6839 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6840 6841 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6842 (X86selects_mask VK1WM:$mask, 6843 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6844 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6845 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6846 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 6847 VR128X:$src1, VK1WM:$mask, 6848 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6849 6850 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6851 (X86selects_mask VK1WM:$mask, 6852 (MaskedOp _.FRC:$src2, 6853 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6854 _.FRC:$src3), 6855 (_.EltVT ZeroFP)))))), 6856 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 6857 VR128X:$src1, VK1WM:$mask, 6858 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6859 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6860 6861 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6862 (X86selects_mask VK1WM:$mask, 6863 (MaskedOp _.FRC:$src2, _.FRC:$src3, 6864 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6865 (_.EltVT ZeroFP)))))), 6866 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 6867 VR128X:$src1, VK1WM:$mask, 6868 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6869 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6870 6871 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6872 (X86selects_mask VK1WM:$mask, 6873 (MaskedOp _.FRC:$src2, 6874 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6875 (_.ScalarLdFrag addr:$src3)), 6876 (_.EltVT ZeroFP)))))), 6877 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 6878 VR128X:$src1, VK1WM:$mask, 6879 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6880 6881 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6882 (X86selects_mask VK1WM:$mask, 6883 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6884 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 6885 (_.EltVT ZeroFP)))))), 6886 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 6887 VR128X:$src1, VK1WM:$mask, 6888 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6889 6890 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6891 (X86selects_mask VK1WM:$mask, 6892 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6893 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6894 (_.EltVT ZeroFP)))))), 6895 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 6896 VR128X:$src1, VK1WM:$mask, 6897 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6898 6899 // Patterns with rounding mode. 6900 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6901 (RndOp _.FRC:$src2, 6902 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6903 _.FRC:$src3, (i32 timm:$rc)))))), 6904 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 6905 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6906 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6907 6908 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6909 (RndOp _.FRC:$src2, _.FRC:$src3, 6910 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6911 (i32 timm:$rc)))))), 6912 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 6913 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6914 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6915 6916 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6917 (X86selects_mask VK1WM:$mask, 6918 (RndOp _.FRC:$src2, 6919 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6920 _.FRC:$src3, (i32 timm:$rc)), 6921 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6922 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 6923 VR128X:$src1, VK1WM:$mask, 6924 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6925 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6926 6927 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6928 (X86selects_mask VK1WM:$mask, 6929 (RndOp _.FRC:$src2, _.FRC:$src3, 6930 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6931 (i32 timm:$rc)), 6932 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6933 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 6934 VR128X:$src1, VK1WM:$mask, 6935 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6936 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6937 6938 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6939 (X86selects_mask VK1WM:$mask, 6940 (RndOp _.FRC:$src2, 6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6942 _.FRC:$src3, (i32 timm:$rc)), 6943 (_.EltVT ZeroFP)))))), 6944 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 6945 VR128X:$src1, VK1WM:$mask, 6946 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6947 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6948 6949 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6950 (X86selects_mask VK1WM:$mask, 6951 (RndOp _.FRC:$src2, _.FRC:$src3, 6952 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6953 (i32 timm:$rc)), 6954 (_.EltVT ZeroFP)))))), 6955 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 6956 VR128X:$src1, VK1WM:$mask, 6957 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6958 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6959 } 6960} 6961 6962defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 6963 "SS", X86Movss, v4f32x_info, fp32imm0>; 6964defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 6965 "SS", X86Movss, v4f32x_info, fp32imm0>; 6966defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 6967 "SS", X86Movss, v4f32x_info, fp32imm0>; 6968defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 6969 "SS", X86Movss, v4f32x_info, fp32imm0>; 6970 6971defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 6972 "SD", X86Movsd, v2f64x_info, fp64imm0>; 6973defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 6974 "SD", X86Movsd, v2f64x_info, fp64imm0>; 6975defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 6976 "SD", X86Movsd, v2f64x_info, fp64imm0>; 6977defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 6978 "SD", X86Movsd, v2f64x_info, fp64imm0>; 6979 6980//===----------------------------------------------------------------------===// 6981// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 6982//===----------------------------------------------------------------------===// 6983let Constraints = "$src1 = $dst" in { 6984multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6985 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6986 // NOTE: The SDNode have the multiply operands first with the add last. 6987 // This enables commuted load patterns to be autogenerated by tablegen. 6988 let ExeDomain = _.ExeDomain in { 6989 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6990 (ins _.RC:$src2, _.RC:$src3), 6991 OpcodeStr, "$src3, $src2", "$src2, $src3", 6992 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6993 AVX512FMA3Base, Sched<[sched]>; 6994 6995 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6996 (ins _.RC:$src2, _.MemOp:$src3), 6997 OpcodeStr, "$src3, $src2", "$src2, $src3", 6998 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 6999 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 7000 7001 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7002 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7003 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7004 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7005 (OpNode _.RC:$src2, 7006 (_.VT (_.BroadcastLdFrag addr:$src3)), 7007 _.RC:$src1)>, 7008 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 7009 } 7010} 7011} // Constraints = "$src1 = $dst" 7012 7013multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7014 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7015 let Predicates = [HasIFMA] in { 7016 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7017 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7018 } 7019 let Predicates = [HasVLX, HasIFMA] in { 7020 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7021 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7022 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7023 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7024 } 7025} 7026 7027defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7028 SchedWriteVecIMul, avx512vl_i64_info>, 7029 VEX_W; 7030defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7031 SchedWriteVecIMul, avx512vl_i64_info>, 7032 VEX_W; 7033 7034//===----------------------------------------------------------------------===// 7035// AVX-512 Scalar convert from sign integer to float/double 7036//===----------------------------------------------------------------------===// 7037 7038multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7039 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7040 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7041 string mem, list<Register> _Uses = [MXCSR], 7042 bit _mayRaiseFPException = 1> { 7043let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7044 mayRaiseFPException = _mayRaiseFPException in { 7045 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7046 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7047 (ins DstVT.FRC:$src1, SrcRC:$src), 7048 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7049 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7050 let mayLoad = 1 in 7051 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7052 (ins DstVT.FRC:$src1, x86memop:$src), 7053 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7054 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7055 } // hasSideEffects = 0 7056 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7057 (ins DstVT.RC:$src1, SrcRC:$src2), 7058 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7059 [(set DstVT.RC:$dst, 7060 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7061 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7062 7063 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7064 (ins DstVT.RC:$src1, x86memop:$src2), 7065 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7066 [(set DstVT.RC:$dst, 7067 (OpNode (DstVT.VT DstVT.RC:$src1), 7068 (ld_frag addr:$src2)))]>, 7069 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7070} 7071 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7072 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7073 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7074} 7075 7076multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7077 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7078 X86VectorVTInfo DstVT, string asm, 7079 string mem> { 7080 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7081 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7082 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7083 !strconcat(asm, 7084 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7085 [(set DstVT.RC:$dst, 7086 (OpNode (DstVT.VT DstVT.RC:$src1), 7087 SrcRC:$src2, 7088 (i32 timm:$rc)))]>, 7089 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7090 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7091 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7092 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7093} 7094 7095multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7096 X86FoldableSchedWrite sched, 7097 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7098 X86MemOperand x86memop, PatFrag ld_frag, 7099 string asm, string mem> { 7100 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7101 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7102 ld_frag, asm, mem>, VEX_LIG; 7103} 7104 7105let Predicates = [HasAVX512] in { 7106defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7107 WriteCvtI2SS, GR32, 7108 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7109 XS, EVEX_CD8<32, CD8VT1>; 7110defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7111 WriteCvtI2SS, GR64, 7112 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7113 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7114defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7115 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7116 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7117defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7118 WriteCvtI2SD, GR64, 7119 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7120 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7121 7122def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7123 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7124def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7125 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7126 7127def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7128 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7129def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7130 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7131def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7132 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7133def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7134 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7135 7136def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7137 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7138def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7139 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7140def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7141 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7142def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7143 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7144 7145defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7146 WriteCvtI2SS, GR32, 7147 v4f32x_info, i32mem, loadi32, 7148 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7149defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7150 WriteCvtI2SS, GR64, 7151 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7152 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7153defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7154 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7155 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7156defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7157 WriteCvtI2SD, GR64, 7158 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7159 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7160 7161def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7162 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7163def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7164 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7165 7166def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7167 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7168def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7169 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7170def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7171 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7172def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7173 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7174 7175def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7176 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7177def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7178 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7179def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7180 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7181def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7182 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7183} 7184 7185//===----------------------------------------------------------------------===// 7186// AVX-512 Scalar convert from float/double to integer 7187//===----------------------------------------------------------------------===// 7188 7189multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7190 X86VectorVTInfo DstVT, SDNode OpNode, 7191 SDNode OpNodeRnd, 7192 X86FoldableSchedWrite sched, string asm, 7193 string aliasStr> { 7194 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7195 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7196 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7197 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7198 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7199 let Uses = [MXCSR] in 7200 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7201 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7202 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7203 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7204 Sched<[sched]>; 7205 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7206 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7207 [(set DstVT.RC:$dst, (OpNode 7208 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7209 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7210 } // Predicates = [HasAVX512] 7211 7212 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7213 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7214 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7215 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7216 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7217 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7218 SrcVT.IntScalarMemOp:$src), 0, "att">; 7219} 7220 7221// Convert float/double to signed/unsigned int 32/64 7222defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7223 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7224 XS, EVEX_CD8<32, CD8VT1>; 7225defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7226 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7227 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7228defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7229 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7230 XS, EVEX_CD8<32, CD8VT1>; 7231defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7232 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7233 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7234defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7235 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7236 XD, EVEX_CD8<64, CD8VT1>; 7237defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7238 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7239 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7240defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7241 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7242 XD, EVEX_CD8<64, CD8VT1>; 7243defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7244 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7245 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7246 7247multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7248 X86VectorVTInfo DstVT, SDNode OpNode, 7249 X86FoldableSchedWrite sched, 7250 string aliasStr> { 7251 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7252 let isCodeGenOnly = 1 in { 7253 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7254 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7255 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7256 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7257 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7258 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7259 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7260 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7261 } 7262 } // Predicates = [HasAVX512] 7263} 7264 7265defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7266 lrint, WriteCvtSS2I, 7267 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7268defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7269 llrint, WriteCvtSS2I, 7270 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7271defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7272 lrint, WriteCvtSD2I, 7273 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7274defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7275 llrint, WriteCvtSD2I, 7276 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7277 7278let Predicates = [HasAVX512] in { 7279 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7280 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7281 7282 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7283 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7284} 7285 7286// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7287// which produce unnecessary vmovs{s,d} instructions 7288let Predicates = [HasAVX512] in { 7289def : Pat<(v4f32 (X86Movss 7290 (v4f32 VR128X:$dst), 7291 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7292 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7293 7294def : Pat<(v4f32 (X86Movss 7295 (v4f32 VR128X:$dst), 7296 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7297 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7298 7299def : Pat<(v4f32 (X86Movss 7300 (v4f32 VR128X:$dst), 7301 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7302 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7303 7304def : Pat<(v4f32 (X86Movss 7305 (v4f32 VR128X:$dst), 7306 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7307 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7308 7309def : Pat<(v2f64 (X86Movsd 7310 (v2f64 VR128X:$dst), 7311 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7312 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7313 7314def : Pat<(v2f64 (X86Movsd 7315 (v2f64 VR128X:$dst), 7316 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7317 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7318 7319def : Pat<(v2f64 (X86Movsd 7320 (v2f64 VR128X:$dst), 7321 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7322 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7323 7324def : Pat<(v2f64 (X86Movsd 7325 (v2f64 VR128X:$dst), 7326 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7327 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7328 7329def : Pat<(v4f32 (X86Movss 7330 (v4f32 VR128X:$dst), 7331 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7332 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7333 7334def : Pat<(v4f32 (X86Movss 7335 (v4f32 VR128X:$dst), 7336 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7337 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7338 7339def : Pat<(v4f32 (X86Movss 7340 (v4f32 VR128X:$dst), 7341 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7342 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7343 7344def : Pat<(v4f32 (X86Movss 7345 (v4f32 VR128X:$dst), 7346 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7347 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7348 7349def : Pat<(v2f64 (X86Movsd 7350 (v2f64 VR128X:$dst), 7351 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7352 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7353 7354def : Pat<(v2f64 (X86Movsd 7355 (v2f64 VR128X:$dst), 7356 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7357 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7358 7359def : Pat<(v2f64 (X86Movsd 7360 (v2f64 VR128X:$dst), 7361 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7362 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7363 7364def : Pat<(v2f64 (X86Movsd 7365 (v2f64 VR128X:$dst), 7366 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7367 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7368} // Predicates = [HasAVX512] 7369 7370// Convert float/double to signed/unsigned int 32/64 with truncation 7371multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7372 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7373 SDNode OpNodeInt, SDNode OpNodeSAE, 7374 X86FoldableSchedWrite sched, string aliasStr>{ 7375let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in { 7376 let isCodeGenOnly = 1 in { 7377 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7378 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7379 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7380 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7381 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7382 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7383 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7384 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7385 } 7386 7387 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7388 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7389 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7390 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7391 let Uses = [MXCSR] in 7392 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7393 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7394 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7395 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7396 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7397 (ins _SrcRC.IntScalarMemOp:$src), 7398 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7399 [(set _DstRC.RC:$dst, 7400 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7401 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7402} //HasAVX512 7403 7404 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7405 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7406 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7407 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7408 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7409 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7410 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7411} 7412 7413defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7414 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7415 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7416defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7417 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7418 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7419defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7420 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7421 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7422defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7423 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7424 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7425 7426defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7427 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7428 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7429defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7430 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7431 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7432defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7433 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7434 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7435defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7436 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7437 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7438 7439//===----------------------------------------------------------------------===// 7440// AVX-512 Convert form float to double and back 7441//===----------------------------------------------------------------------===// 7442 7443let Uses = [MXCSR], mayRaiseFPException = 1 in 7444multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7445 X86VectorVTInfo _Src, SDNode OpNode, 7446 X86FoldableSchedWrite sched> { 7447 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7448 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7449 "$src2, $src1", "$src1, $src2", 7450 (_.VT (OpNode (_.VT _.RC:$src1), 7451 (_Src.VT _Src.RC:$src2)))>, 7452 EVEX_4V, VEX_LIG, Sched<[sched]>; 7453 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7454 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7455 "$src2, $src1", "$src1, $src2", 7456 (_.VT (OpNode (_.VT _.RC:$src1), 7457 (_Src.ScalarIntMemFrags addr:$src2)))>, 7458 EVEX_4V, VEX_LIG, 7459 Sched<[sched.Folded, sched.ReadAfterFold]>; 7460 7461 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7462 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7463 (ins _.FRC:$src1, _Src.FRC:$src2), 7464 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7465 EVEX_4V, VEX_LIG, Sched<[sched]>; 7466 let mayLoad = 1 in 7467 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7468 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7469 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7470 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7471 } 7472} 7473 7474// Scalar Conversion with SAE - suppress all exceptions 7475multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7476 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7477 X86FoldableSchedWrite sched> { 7478 let Uses = [MXCSR] in 7479 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7480 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7481 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7482 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7483 (_Src.VT _Src.RC:$src2)))>, 7484 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7485} 7486 7487// Scalar Conversion with rounding control (RC) 7488multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7489 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7490 X86FoldableSchedWrite sched> { 7491 let Uses = [MXCSR] in 7492 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7493 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7494 "$rc, $src2, $src1", "$src1, $src2, $rc", 7495 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7496 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7497 EVEX_4V, VEX_LIG, Sched<[sched]>, 7498 EVEX_B, EVEX_RC; 7499} 7500multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, 7501 SDNode OpNode, SDNode OpNodeRnd, 7502 X86FoldableSchedWrite sched, 7503 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7504 let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in { 7505 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7506 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7507 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; 7508 } 7509} 7510 7511multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, 7512 SDNode OpNode, SDNode OpNodeSAE, 7513 X86FoldableSchedWrite sched, 7514 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7515 let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in { 7516 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7517 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7518 EVEX_CD8<32, CD8VT1>, XS; 7519 } 7520} 7521defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds, 7522 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7523 f32x_info>; 7524defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts, 7525 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7526 f64x_info>; 7527 7528def : Pat<(f64 (any_fpextend FR32X:$src)), 7529 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7530 Requires<[HasAVX512]>; 7531def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7532 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7533 Requires<[HasAVX512, OptForSize]>; 7534 7535def : Pat<(f32 (any_fpround FR64X:$src)), 7536 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7537 Requires<[HasAVX512]>; 7538 7539def : Pat<(v4f32 (X86Movss 7540 (v4f32 VR128X:$dst), 7541 (v4f32 (scalar_to_vector 7542 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7543 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7544 Requires<[HasAVX512]>; 7545 7546def : Pat<(v2f64 (X86Movsd 7547 (v2f64 VR128X:$dst), 7548 (v2f64 (scalar_to_vector 7549 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7550 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7551 Requires<[HasAVX512]>; 7552 7553//===----------------------------------------------------------------------===// 7554// AVX-512 Vector convert from signed/unsigned integer to float/double 7555// and from float/double to signed/unsigned integer 7556//===----------------------------------------------------------------------===// 7557 7558multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7559 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 7560 X86FoldableSchedWrite sched, 7561 string Broadcast = _.BroadcastStr, 7562 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7563 RegisterClass MaskRC = _.KRCWM, 7564 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7565 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7566let Uses = [MXCSR], mayRaiseFPException = 1 in { 7567 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7568 (ins _Src.RC:$src), 7569 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7570 (ins MaskRC:$mask, _Src.RC:$src), 7571 OpcodeStr, "$src", "$src", 7572 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7573 (vselect_mask MaskRC:$mask, 7574 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7575 _.RC:$src0), 7576 (vselect_mask MaskRC:$mask, 7577 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7578 _.ImmAllZerosV)>, 7579 EVEX, Sched<[sched]>; 7580 7581 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7582 (ins MemOp:$src), 7583 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7584 (ins MaskRC:$mask, MemOp:$src), 7585 OpcodeStr#Alias, "$src", "$src", 7586 LdDAG, 7587 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 7588 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 7589 EVEX, Sched<[sched.Folded]>; 7590 7591 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7592 (ins _Src.ScalarMemOp:$src), 7593 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7594 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7595 OpcodeStr, 7596 "${src}"#Broadcast, "${src}"#Broadcast, 7597 (_.VT (OpNode (_Src.VT 7598 (_Src.BroadcastLdFrag addr:$src)) 7599 )), 7600 (vselect_mask MaskRC:$mask, 7601 (_.VT 7602 (MaskOpNode 7603 (_Src.VT 7604 (_Src.BroadcastLdFrag addr:$src)))), 7605 _.RC:$src0), 7606 (vselect_mask MaskRC:$mask, 7607 (_.VT 7608 (MaskOpNode 7609 (_Src.VT 7610 (_Src.BroadcastLdFrag addr:$src)))), 7611 _.ImmAllZerosV)>, 7612 EVEX, EVEX_B, Sched<[sched.Folded]>; 7613 } 7614} 7615// Conversion with SAE - suppress all exceptions 7616multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7617 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7618 X86FoldableSchedWrite sched> { 7619 let Uses = [MXCSR] in 7620 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7621 (ins _Src.RC:$src), OpcodeStr, 7622 "{sae}, $src", "$src, {sae}", 7623 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7624 EVEX, EVEX_B, Sched<[sched]>; 7625} 7626 7627// Conversion with rounding control (RC) 7628multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7629 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 7630 X86FoldableSchedWrite sched> { 7631 let Uses = [MXCSR] in 7632 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7633 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7634 "$rc, $src", "$src, $rc", 7635 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7636 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7637} 7638 7639// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7640multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7641 X86VectorVTInfo _Src, SDPatternOperator OpNode, 7642 SDNode MaskOpNode, 7643 X86FoldableSchedWrite sched, 7644 string Broadcast = _.BroadcastStr, 7645 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7646 RegisterClass MaskRC = _.KRCWM> 7647 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 7648 Alias, MemOp, MaskRC, 7649 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 7650 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7651 7652// Extend Float to Double 7653multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, 7654 X86SchedWriteWidths sched> { 7655 let Predicates = [HasAVX512] in { 7656 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, 7657 any_fpextend, fpextend, sched.ZMM>, 7658 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, 7659 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7660 } 7661 let Predicates = [HasVLX] in { 7662 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, 7663 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", 7664 "", f64mem>, EVEX_V128; 7665 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, 7666 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 7667 } 7668} 7669 7670// Truncate Double to Float 7671multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7672 let Predicates = [HasAVX512] in { 7673 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, 7674 X86any_vfpround, X86vfpround, sched.ZMM>, 7675 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, 7676 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7677 } 7678 let Predicates = [HasVLX] in { 7679 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, 7680 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 7681 f128mem, VK2WM>, EVEX_V128; 7682 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, 7683 X86any_vfpround, X86vfpround, 7684 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7685 } 7686 7687 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7688 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7689 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7690 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7691 VK2WM:$mask, VR128X:$src), 0, "att">; 7692 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 7693 "$dst {${mask}} {z}, $src}", 7694 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7695 VK2WM:$mask, VR128X:$src), 0, "att">; 7696 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7697 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7698 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7699 "$dst {${mask}}, ${src}{1to2}}", 7700 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7701 VK2WM:$mask, f64mem:$src), 0, "att">; 7702 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7703 "$dst {${mask}} {z}, ${src}{1to2}}", 7704 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7705 VK2WM:$mask, f64mem:$src), 0, "att">; 7706 7707 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7708 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7709 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7710 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7711 VK4WM:$mask, VR256X:$src), 0, "att">; 7712 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 7713 "$dst {${mask}} {z}, $src}", 7714 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7715 VK4WM:$mask, VR256X:$src), 0, "att">; 7716 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7717 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7718 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7719 "$dst {${mask}}, ${src}{1to4}}", 7720 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7721 VK4WM:$mask, f64mem:$src), 0, "att">; 7722 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7723 "$dst {${mask}} {z}, ${src}{1to4}}", 7724 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7725 VK4WM:$mask, f64mem:$src), 0, "att">; 7726} 7727 7728defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, 7729 VEX_W, PD, EVEX_CD8<64, CD8VF>; 7730defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, 7731 PS, EVEX_CD8<32, CD8VH>; 7732 7733let Predicates = [HasVLX] in { 7734 // Special patterns to allow use of X86vmfpround for masking. Instruction 7735 // patterns have been disabled with null_frag. 7736 def : Pat<(X86any_vfpround (v2f64 VR128X:$src)), 7737 (VCVTPD2PSZ128rr VR128X:$src)>; 7738 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), 7739 VK2WM:$mask), 7740 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 7741 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV, 7742 VK2WM:$mask), 7743 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 7744 7745 def : Pat<(X86any_vfpround (loadv2f64 addr:$src)), 7746 (VCVTPD2PSZ128rm addr:$src)>; 7747 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), 7748 VK2WM:$mask), 7749 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7750 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV, 7751 VK2WM:$mask), 7752 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; 7753 7754 def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))), 7755 (VCVTPD2PSZ128rmb addr:$src)>; 7756 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7757 (v4f32 VR128X:$src0), VK2WM:$mask), 7758 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7759 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7760 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 7761 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 7762} 7763 7764// Convert Signed/Unsigned Doubleword to Double 7765let Uses = []<Register>, mayRaiseFPException = 0 in 7766multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7767 SDNode MaskOpNode, SDPatternOperator OpNode128, 7768 SDNode MaskOpNode128, 7769 X86SchedWriteWidths sched> { 7770 // No rounding in this op 7771 let Predicates = [HasAVX512] in 7772 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 7773 MaskOpNode, sched.ZMM>, EVEX_V512; 7774 7775 let Predicates = [HasVLX] in { 7776 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 7777 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 7778 "", i64mem, VK2WM, 7779 (v2f64 (OpNode128 (bc_v4i32 7780 (v2i64 7781 (scalar_to_vector (loadi64 addr:$src)))))), 7782 (v2f64 (MaskOpNode128 (bc_v4i32 7783 (v2i64 7784 (scalar_to_vector (loadi64 addr:$src))))))>, 7785 EVEX_V128; 7786 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 7787 MaskOpNode, sched.YMM>, EVEX_V256; 7788 } 7789} 7790 7791// Convert Signed/Unsigned Doubleword to Float 7792multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7793 SDNode MaskOpNode, SDNode OpNodeRnd, 7794 X86SchedWriteWidths sched> { 7795 let Predicates = [HasAVX512] in 7796 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 7797 MaskOpNode, sched.ZMM>, 7798 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 7799 OpNodeRnd, sched.ZMM>, EVEX_V512; 7800 7801 let Predicates = [HasVLX] in { 7802 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 7803 MaskOpNode, sched.XMM>, EVEX_V128; 7804 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 7805 MaskOpNode, sched.YMM>, EVEX_V256; 7806 } 7807} 7808 7809// Convert Float to Signed/Unsigned Doubleword with truncation 7810multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7811 SDNode MaskOpNode, 7812 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7813 let Predicates = [HasAVX512] in { 7814 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7815 MaskOpNode, sched.ZMM>, 7816 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 7817 OpNodeSAE, sched.ZMM>, EVEX_V512; 7818 } 7819 let Predicates = [HasVLX] in { 7820 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7821 MaskOpNode, sched.XMM>, EVEX_V128; 7822 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7823 MaskOpNode, sched.YMM>, EVEX_V256; 7824 } 7825} 7826 7827// Convert Float to Signed/Unsigned Doubleword 7828multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7829 SDNode MaskOpNode, SDNode OpNodeRnd, 7830 X86SchedWriteWidths sched> { 7831 let Predicates = [HasAVX512] in { 7832 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7833 MaskOpNode, sched.ZMM>, 7834 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 7835 OpNodeRnd, sched.ZMM>, EVEX_V512; 7836 } 7837 let Predicates = [HasVLX] in { 7838 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7839 MaskOpNode, sched.XMM>, EVEX_V128; 7840 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7841 MaskOpNode, sched.YMM>, EVEX_V256; 7842 } 7843} 7844 7845// Convert Double to Signed/Unsigned Doubleword with truncation 7846multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7847 SDNode MaskOpNode, SDNode OpNodeSAE, 7848 X86SchedWriteWidths sched> { 7849 let Predicates = [HasAVX512] in { 7850 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7851 MaskOpNode, sched.ZMM>, 7852 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 7853 OpNodeSAE, sched.ZMM>, EVEX_V512; 7854 } 7855 let Predicates = [HasVLX] in { 7856 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7857 // memory forms of these instructions in Asm Parser. They have the same 7858 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7859 // due to the same reason. 7860 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7861 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7862 VK2WM>, EVEX_V128; 7863 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7864 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7865 } 7866 7867 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7868 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 7869 VR128X:$src), 0, "att">; 7870 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7871 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7872 VK2WM:$mask, VR128X:$src), 0, "att">; 7873 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7874 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7875 VK2WM:$mask, VR128X:$src), 0, "att">; 7876 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7877 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7878 f64mem:$src), 0, "att">; 7879 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7880 "$dst {${mask}}, ${src}{1to2}}", 7881 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7882 VK2WM:$mask, f64mem:$src), 0, "att">; 7883 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7884 "$dst {${mask}} {z}, ${src}{1to2}}", 7885 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7886 VK2WM:$mask, f64mem:$src), 0, "att">; 7887 7888 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7889 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 7890 VR256X:$src), 0, "att">; 7891 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7892 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7893 VK4WM:$mask, VR256X:$src), 0, "att">; 7894 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7895 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7896 VK4WM:$mask, VR256X:$src), 0, "att">; 7897 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7898 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7899 f64mem:$src), 0, "att">; 7900 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7901 "$dst {${mask}}, ${src}{1to4}}", 7902 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7903 VK4WM:$mask, f64mem:$src), 0, "att">; 7904 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7905 "$dst {${mask}} {z}, ${src}{1to4}}", 7906 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7907 VK4WM:$mask, f64mem:$src), 0, "att">; 7908} 7909 7910// Convert Double to Signed/Unsigned Doubleword 7911multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7912 SDNode MaskOpNode, SDNode OpNodeRnd, 7913 X86SchedWriteWidths sched> { 7914 let Predicates = [HasAVX512] in { 7915 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7916 MaskOpNode, sched.ZMM>, 7917 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 7918 OpNodeRnd, sched.ZMM>, EVEX_V512; 7919 } 7920 let Predicates = [HasVLX] in { 7921 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7922 // memory forms of these instructions in Asm Parcer. They have the same 7923 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7924 // due to the same reason. 7925 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7926 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7927 VK2WM>, EVEX_V128; 7928 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7929 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7930 } 7931 7932 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7933 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7934 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7935 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7936 VK2WM:$mask, VR128X:$src), 0, "att">; 7937 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7938 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7939 VK2WM:$mask, VR128X:$src), 0, "att">; 7940 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7941 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7942 f64mem:$src), 0, "att">; 7943 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7944 "$dst {${mask}}, ${src}{1to2}}", 7945 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7946 VK2WM:$mask, f64mem:$src), 0, "att">; 7947 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7948 "$dst {${mask}} {z}, ${src}{1to2}}", 7949 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7950 VK2WM:$mask, f64mem:$src), 0, "att">; 7951 7952 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7953 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7954 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7955 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7956 VK4WM:$mask, VR256X:$src), 0, "att">; 7957 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7958 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7959 VK4WM:$mask, VR256X:$src), 0, "att">; 7960 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7961 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7962 f64mem:$src), 0, "att">; 7963 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7964 "$dst {${mask}}, ${src}{1to4}}", 7965 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7966 VK4WM:$mask, f64mem:$src), 0, "att">; 7967 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7968 "$dst {${mask}} {z}, ${src}{1to4}}", 7969 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7970 VK4WM:$mask, f64mem:$src), 0, "att">; 7971} 7972 7973// Convert Double to Signed/Unsigned Quardword 7974multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7975 SDNode MaskOpNode, SDNode OpNodeRnd, 7976 X86SchedWriteWidths sched> { 7977 let Predicates = [HasDQI] in { 7978 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7979 MaskOpNode, sched.ZMM>, 7980 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 7981 OpNodeRnd, sched.ZMM>, EVEX_V512; 7982 } 7983 let Predicates = [HasDQI, HasVLX] in { 7984 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7985 MaskOpNode, sched.XMM>, EVEX_V128; 7986 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7987 MaskOpNode, sched.YMM>, EVEX_V256; 7988 } 7989} 7990 7991// Convert Double to Signed/Unsigned Quardword with truncation 7992multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7993 SDNode MaskOpNode, SDNode OpNodeRnd, 7994 X86SchedWriteWidths sched> { 7995 let Predicates = [HasDQI] in { 7996 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7997 MaskOpNode, sched.ZMM>, 7998 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 7999 OpNodeRnd, sched.ZMM>, EVEX_V512; 8000 } 8001 let Predicates = [HasDQI, HasVLX] in { 8002 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8003 MaskOpNode, sched.XMM>, EVEX_V128; 8004 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8005 MaskOpNode, sched.YMM>, EVEX_V256; 8006 } 8007} 8008 8009// Convert Signed/Unsigned Quardword to Double 8010multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8011 SDNode MaskOpNode, SDNode OpNodeRnd, 8012 X86SchedWriteWidths sched> { 8013 let Predicates = [HasDQI] in { 8014 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8015 MaskOpNode, sched.ZMM>, 8016 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8017 OpNodeRnd, sched.ZMM>, EVEX_V512; 8018 } 8019 let Predicates = [HasDQI, HasVLX] in { 8020 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8021 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 8022 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8023 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 8024 } 8025} 8026 8027// Convert Float to Signed/Unsigned Quardword 8028multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8029 SDNode MaskOpNode, SDNode OpNodeRnd, 8030 X86SchedWriteWidths sched> { 8031 let Predicates = [HasDQI] in { 8032 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8033 MaskOpNode, sched.ZMM>, 8034 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8035 OpNodeRnd, sched.ZMM>, EVEX_V512; 8036 } 8037 let Predicates = [HasDQI, HasVLX] in { 8038 // Explicitly specified broadcast string, since we take only 2 elements 8039 // from v4f32x_info source 8040 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8041 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8042 (v2i64 (OpNode (bc_v4f32 8043 (v2f64 8044 (scalar_to_vector (loadf64 addr:$src)))))), 8045 (v2i64 (MaskOpNode (bc_v4f32 8046 (v2f64 8047 (scalar_to_vector (loadf64 addr:$src))))))>, 8048 EVEX_V128; 8049 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8050 MaskOpNode, sched.YMM>, EVEX_V256; 8051 } 8052} 8053 8054// Convert Float to Signed/Unsigned Quardword with truncation 8055multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8056 SDNode MaskOpNode, SDNode OpNodeRnd, 8057 X86SchedWriteWidths sched> { 8058 let Predicates = [HasDQI] in { 8059 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8060 MaskOpNode, sched.ZMM>, 8061 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8062 OpNodeRnd, sched.ZMM>, EVEX_V512; 8063 } 8064 let Predicates = [HasDQI, HasVLX] in { 8065 // Explicitly specified broadcast string, since we take only 2 elements 8066 // from v4f32x_info source 8067 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8068 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8069 (v2i64 (OpNode (bc_v4f32 8070 (v2f64 8071 (scalar_to_vector (loadf64 addr:$src)))))), 8072 (v2i64 (MaskOpNode (bc_v4f32 8073 (v2f64 8074 (scalar_to_vector (loadf64 addr:$src))))))>, 8075 EVEX_V128; 8076 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8077 MaskOpNode, sched.YMM>, EVEX_V256; 8078 } 8079} 8080 8081// Convert Signed/Unsigned Quardword to Float 8082multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8083 SDNode MaskOpNode, SDNode OpNodeRnd, 8084 X86SchedWriteWidths sched> { 8085 let Predicates = [HasDQI] in { 8086 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, 8087 MaskOpNode, sched.ZMM>, 8088 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, 8089 OpNodeRnd, sched.ZMM>, EVEX_V512; 8090 } 8091 let Predicates = [HasDQI, HasVLX] in { 8092 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8093 // memory forms of these instructions in Asm Parcer. They have the same 8094 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8095 // due to the same reason. 8096 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag, 8097 null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>, 8098 EVEX_V128, NotEVEX2VEXConvertible; 8099 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, 8100 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256, 8101 NotEVEX2VEXConvertible; 8102 } 8103 8104 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8105 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8106 VR128X:$src), 0, "att">; 8107 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8108 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8109 VK2WM:$mask, VR128X:$src), 0, "att">; 8110 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8111 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8112 VK2WM:$mask, VR128X:$src), 0, "att">; 8113 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8114 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8115 i64mem:$src), 0, "att">; 8116 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8117 "$dst {${mask}}, ${src}{1to2}}", 8118 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8119 VK2WM:$mask, i64mem:$src), 0, "att">; 8120 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8121 "$dst {${mask}} {z}, ${src}{1to2}}", 8122 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8123 VK2WM:$mask, i64mem:$src), 0, "att">; 8124 8125 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8126 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8127 VR256X:$src), 0, "att">; 8128 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8129 "$dst {${mask}}, $src}", 8130 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8131 VK4WM:$mask, VR256X:$src), 0, "att">; 8132 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8133 "$dst {${mask}} {z}, $src}", 8134 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8135 VK4WM:$mask, VR256X:$src), 0, "att">; 8136 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8137 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8138 i64mem:$src), 0, "att">; 8139 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8140 "$dst {${mask}}, ${src}{1to4}}", 8141 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8142 VK4WM:$mask, i64mem:$src), 0, "att">; 8143 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8144 "$dst {${mask}} {z}, ${src}{1to4}}", 8145 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8146 VK4WM:$mask, i64mem:$src), 0, "att">; 8147} 8148 8149defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8150 X86any_VSintToFP, X86VSintToFP, 8151 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8152 8153defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8154 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8155 PS, EVEX_CD8<32, CD8VF>; 8156 8157defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8158 X86cvttp2si, X86cvttp2siSAE, 8159 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; 8160 8161defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8162 X86cvttp2si, X86cvttp2siSAE, 8163 SchedWriteCvtPD2DQ>, 8164 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8165 8166defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8167 X86cvttp2ui, X86cvttp2uiSAE, 8168 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; 8169 8170defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8171 X86cvttp2ui, X86cvttp2uiSAE, 8172 SchedWriteCvtPD2DQ>, 8173 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8174 8175defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8176 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8177 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8178 8179defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8180 uint_to_fp, X86VUintToFpRnd, 8181 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; 8182 8183defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8184 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8185 EVEX_CD8<32, CD8VF>; 8186 8187defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8188 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8189 VEX_W, EVEX_CD8<64, CD8VF>; 8190 8191defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8192 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8193 PS, EVEX_CD8<32, CD8VF>; 8194 8195defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8196 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8197 PS, EVEX_CD8<64, CD8VF>; 8198 8199defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8200 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8201 PD, EVEX_CD8<64, CD8VF>; 8202 8203defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8204 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8205 EVEX_CD8<32, CD8VH>; 8206 8207defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8208 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8209 PD, EVEX_CD8<64, CD8VF>; 8210 8211defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8212 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8213 EVEX_CD8<32, CD8VH>; 8214 8215defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8216 X86cvttp2si, X86cvttp2siSAE, 8217 SchedWriteCvtPD2DQ>, VEX_W, 8218 PD, EVEX_CD8<64, CD8VF>; 8219 8220defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8221 X86cvttp2si, X86cvttp2siSAE, 8222 SchedWriteCvtPS2DQ>, PD, 8223 EVEX_CD8<32, CD8VH>; 8224 8225defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8226 X86cvttp2ui, X86cvttp2uiSAE, 8227 SchedWriteCvtPD2DQ>, VEX_W, 8228 PD, EVEX_CD8<64, CD8VF>; 8229 8230defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8231 X86cvttp2ui, X86cvttp2uiSAE, 8232 SchedWriteCvtPS2DQ>, PD, 8233 EVEX_CD8<32, CD8VH>; 8234 8235defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8236 sint_to_fp, X86VSintToFpRnd, 8237 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; 8238 8239defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8240 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8241 VEX_W, XS, EVEX_CD8<64, CD8VF>; 8242 8243defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp, 8244 sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8245 VEX_W, PS, EVEX_CD8<64, CD8VF>; 8246 8247defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp, 8248 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>, 8249 VEX_W, XD, EVEX_CD8<64, CD8VF>; 8250 8251let Predicates = [HasVLX] in { 8252 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8253 // patterns have been disabled with null_frag. 8254 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8255 (VCVTPD2DQZ128rr VR128X:$src)>; 8256 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8257 VK2WM:$mask), 8258 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8259 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8260 VK2WM:$mask), 8261 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8262 8263 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8264 (VCVTPD2DQZ128rm addr:$src)>; 8265 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8266 VK2WM:$mask), 8267 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8268 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8269 VK2WM:$mask), 8270 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8271 8272 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8273 (VCVTPD2DQZ128rmb addr:$src)>; 8274 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8275 (v4i32 VR128X:$src0), VK2WM:$mask), 8276 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8277 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8278 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8279 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8280 8281 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8282 // patterns have been disabled with null_frag. 8283 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8284 (VCVTTPD2DQZ128rr VR128X:$src)>; 8285 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8286 VK2WM:$mask), 8287 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8288 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8289 VK2WM:$mask), 8290 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8291 8292 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8293 (VCVTTPD2DQZ128rm addr:$src)>; 8294 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8295 VK2WM:$mask), 8296 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8297 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8298 VK2WM:$mask), 8299 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8300 8301 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8302 (VCVTTPD2DQZ128rmb addr:$src)>; 8303 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8304 (v4i32 VR128X:$src0), VK2WM:$mask), 8305 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8306 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8307 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8308 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8309 8310 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8311 // patterns have been disabled with null_frag. 8312 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8313 (VCVTPD2UDQZ128rr VR128X:$src)>; 8314 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8315 VK2WM:$mask), 8316 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8317 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8318 VK2WM:$mask), 8319 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8320 8321 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8322 (VCVTPD2UDQZ128rm addr:$src)>; 8323 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8324 VK2WM:$mask), 8325 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8326 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8327 VK2WM:$mask), 8328 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8329 8330 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8331 (VCVTPD2UDQZ128rmb addr:$src)>; 8332 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8333 (v4i32 VR128X:$src0), VK2WM:$mask), 8334 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8335 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8336 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8337 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8338 8339 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8340 // patterns have been disabled with null_frag. 8341 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8342 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8343 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8344 VK2WM:$mask), 8345 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8346 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8347 VK2WM:$mask), 8348 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8349 8350 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 8351 (VCVTTPD2UDQZ128rm addr:$src)>; 8352 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8353 VK2WM:$mask), 8354 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8355 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8356 VK2WM:$mask), 8357 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8358 8359 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8360 (VCVTTPD2UDQZ128rmb addr:$src)>; 8361 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8362 (v4i32 VR128X:$src0), VK2WM:$mask), 8363 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8364 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8365 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8366 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8367} 8368 8369let Predicates = [HasDQI, HasVLX] in { 8370 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8371 (VCVTPS2QQZ128rm addr:$src)>; 8372 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8373 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8374 VR128X:$src0)), 8375 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8376 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8377 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8378 v2i64x_info.ImmAllZerosV)), 8379 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8380 8381 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8382 (VCVTPS2UQQZ128rm addr:$src)>; 8383 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8384 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8385 VR128X:$src0)), 8386 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8387 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8388 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8389 v2i64x_info.ImmAllZerosV)), 8390 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8391 8392 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8393 (VCVTTPS2QQZ128rm addr:$src)>; 8394 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8395 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8396 VR128X:$src0)), 8397 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8398 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8399 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8400 v2i64x_info.ImmAllZerosV)), 8401 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8402 8403 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8404 (VCVTTPS2UQQZ128rm addr:$src)>; 8405 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8406 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8407 VR128X:$src0)), 8408 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8409 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8410 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8411 v2i64x_info.ImmAllZerosV)), 8412 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8413} 8414 8415let Predicates = [HasVLX] in { 8416 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8417 (VCVTDQ2PDZ128rm addr:$src)>; 8418 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8419 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8420 VR128X:$src0)), 8421 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8422 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8423 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8424 v2f64x_info.ImmAllZerosV)), 8425 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8426 8427 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8428 (VCVTUDQ2PDZ128rm addr:$src)>; 8429 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8430 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8431 VR128X:$src0)), 8432 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8433 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8434 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8435 v2f64x_info.ImmAllZerosV)), 8436 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8437} 8438 8439let Predicates = [HasDQI, HasVLX] in { 8440 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 8441 // patterns have been disabled with null_frag. 8442 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))), 8443 (VCVTQQ2PSZ128rr VR128X:$src)>; 8444 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8445 VK2WM:$mask), 8446 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8447 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8448 VK2WM:$mask), 8449 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8450 8451 def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))), 8452 (VCVTQQ2PSZ128rm addr:$src)>; 8453 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8454 VK2WM:$mask), 8455 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8456 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8457 VK2WM:$mask), 8458 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8459 8460 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8461 (VCVTQQ2PSZ128rmb addr:$src)>; 8462 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8463 (v4f32 VR128X:$src0), VK2WM:$mask), 8464 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8465 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8466 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8467 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8468 8469 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 8470 // patterns have been disabled with null_frag. 8471 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))), 8472 (VCVTUQQ2PSZ128rr VR128X:$src)>; 8473 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8474 VK2WM:$mask), 8475 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8476 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8477 VK2WM:$mask), 8478 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8479 8480 def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))), 8481 (VCVTUQQ2PSZ128rm addr:$src)>; 8482 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8483 VK2WM:$mask), 8484 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8485 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8486 VK2WM:$mask), 8487 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8488 8489 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8490 (VCVTUQQ2PSZ128rmb addr:$src)>; 8491 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8492 (v4f32 VR128X:$src0), VK2WM:$mask), 8493 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8494 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8495 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8496 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8497} 8498 8499//===----------------------------------------------------------------------===// 8500// Half precision conversion instructions 8501//===----------------------------------------------------------------------===// 8502 8503let Uses = [MXCSR], mayRaiseFPException = 1 in 8504multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8505 X86MemOperand x86memop, dag ld_dag, 8506 X86FoldableSchedWrite sched> { 8507 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8508 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8509 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 8510 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8511 T8PD, Sched<[sched]>; 8512 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8513 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8514 (X86any_cvtph2ps (_src.VT ld_dag)), 8515 (X86cvtph2ps (_src.VT ld_dag))>, 8516 T8PD, Sched<[sched.Folded]>; 8517} 8518 8519multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8520 X86FoldableSchedWrite sched> { 8521 let Uses = [MXCSR] in 8522 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8523 (ins _src.RC:$src), "vcvtph2ps", 8524 "{sae}, $src", "$src, {sae}", 8525 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8526 T8PD, EVEX_B, Sched<[sched]>; 8527} 8528 8529let Predicates = [HasAVX512] in 8530 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 8531 (load addr:$src), WriteCvtPH2PSZ>, 8532 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8533 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8534 8535let Predicates = [HasVLX] in { 8536 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8537 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 8538 EVEX_CD8<32, CD8VH>; 8539 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8540 (bitconvert (v2i64 (X86vzload64 addr:$src))), 8541 WriteCvtPH2PS>, EVEX, EVEX_V128, 8542 EVEX_CD8<32, CD8VH>; 8543 8544 // Pattern match vcvtph2ps of a scalar i64 load. 8545 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 8546 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8547 (VCVTPH2PSZ128rm addr:$src)>; 8548} 8549 8550multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8551 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8552let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8553 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8554 (ins _src.RC:$src1, i32u8imm:$src2), 8555 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8556 [(set _dest.RC:$dst, 8557 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8558 Sched<[RR]>; 8559 let Constraints = "$src0 = $dst" in 8560 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8561 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8562 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8563 [(set _dest.RC:$dst, 8564 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8565 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8566 Sched<[RR]>, EVEX_K; 8567 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8568 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8569 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8570 [(set _dest.RC:$dst, 8571 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8572 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8573 Sched<[RR]>, EVEX_KZ; 8574 let hasSideEffects = 0, mayStore = 1 in { 8575 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8576 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8577 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8578 Sched<[MR]>; 8579 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8580 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8581 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8582 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 8583 } 8584} 8585} 8586 8587multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8588 SchedWrite Sched> { 8589 let hasSideEffects = 0, Uses = [MXCSR] in 8590 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 8591 (outs _dest.RC:$dst), 8592 (ins _src.RC:$src1, i32u8imm:$src2), 8593 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 8594 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 8595} 8596 8597let Predicates = [HasAVX512] in { 8598 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8599 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8600 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8601 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8602 8603 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 8604 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 8605} 8606 8607let Predicates = [HasVLX] in { 8608 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 8609 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 8610 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 8611 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 8612 WriteCvtPS2PH, WriteCvtPS2PHSt>, 8613 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 8614 8615 def : Pat<(store (f64 (extractelt 8616 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 8617 (iPTR 0))), addr:$dst), 8618 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8619 def : Pat<(store (i64 (extractelt 8620 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 8621 (iPTR 0))), addr:$dst), 8622 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8623 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 8624 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 8625} 8626 8627// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 8628multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 8629 string OpcodeStr, Domain d, 8630 X86FoldableSchedWrite sched = WriteFComX> { 8631 let hasSideEffects = 0, Uses = [MXCSR] in 8632 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 8633 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 8634 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 8635} 8636 8637let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8638 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 8639 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8640 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 8641 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8642 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 8643 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8644 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 8645 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8646} 8647 8648let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8649 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 8650 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8651 EVEX_CD8<32, CD8VT1>; 8652 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 8653 "ucomisd", SSEPackedDouble>, PD, EVEX, 8654 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8655 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 8656 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8657 EVEX_CD8<32, CD8VT1>; 8658 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 8659 "comisd", SSEPackedDouble>, PD, EVEX, 8660 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8661 let isCodeGenOnly = 1 in { 8662 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 8663 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8664 EVEX_CD8<32, CD8VT1>; 8665 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 8666 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 8667 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8668 8669 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 8670 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8671 EVEX_CD8<32, CD8VT1>; 8672 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 8673 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 8674 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8675 } 8676} 8677 8678/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd 8679multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8680 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8681 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 8682 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8683 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8684 "$src2, $src1", "$src1, $src2", 8685 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8686 EVEX_4V, VEX_LIG, Sched<[sched]>; 8687 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8688 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8689 "$src2, $src1", "$src1, $src2", 8690 (OpNode (_.VT _.RC:$src1), 8691 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, 8692 Sched<[sched.Folded, sched.ReadAfterFold]>; 8693} 8694} 8695 8696defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 8697 f32x_info>, EVEX_CD8<32, CD8VT1>, 8698 T8PD; 8699defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 8700 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 8701 T8PD; 8702defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 8703 SchedWriteFRsqrt.Scl, f32x_info>, 8704 EVEX_CD8<32, CD8VT1>, T8PD; 8705defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 8706 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 8707 EVEX_CD8<64, CD8VT1>, T8PD; 8708 8709/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 8710multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 8711 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8712 let ExeDomain = _.ExeDomain in { 8713 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8714 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8715 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 8716 Sched<[sched]>; 8717 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8718 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8719 (OpNode (_.VT 8720 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 8721 Sched<[sched.Folded, sched.ReadAfterFold]>; 8722 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8723 (ins _.ScalarMemOp:$src), OpcodeStr, 8724 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8725 (OpNode (_.VT 8726 (_.BroadcastLdFrag addr:$src)))>, 8727 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8728 } 8729} 8730 8731let Uses = [MXCSR] in 8732multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 8733 X86SchedWriteWidths sched> { 8734 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM, 8735 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 8736 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM, 8737 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8738 8739 // Define only if AVX512VL feature is present. 8740 let Predicates = [HasVLX] in { 8741 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8742 OpNode, sched.XMM, v4f32x_info>, 8743 EVEX_V128, EVEX_CD8<32, CD8VF>; 8744 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8745 OpNode, sched.YMM, v8f32x_info>, 8746 EVEX_V256, EVEX_CD8<32, CD8VF>; 8747 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8748 OpNode, sched.XMM, v2f64x_info>, 8749 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 8750 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8751 OpNode, sched.YMM, v4f64x_info>, 8752 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 8753 } 8754} 8755 8756defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; 8757defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; 8758 8759/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 8760multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 8761 SDNode OpNode, SDNode OpNodeSAE, 8762 X86FoldableSchedWrite sched> { 8763 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 8764 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8765 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8766 "$src2, $src1", "$src1, $src2", 8767 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8768 Sched<[sched]>, SIMD_EXC; 8769 8770 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8771 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8772 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 8773 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8774 EVEX_B, Sched<[sched]>; 8775 8776 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8777 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8778 "$src2, $src1", "$src1, $src2", 8779 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 8780 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8781 } 8782} 8783 8784multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8785 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 8786 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 8787 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG; 8788 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 8789 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 8790} 8791 8792let Predicates = [HasERI] in { 8793 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 8794 SchedWriteFRcp.Scl>, T8PD, EVEX_4V; 8795 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 8796 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; 8797} 8798 8799defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 8800 SchedWriteFRnd.Scl>, T8PD, EVEX_4V; 8801/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 8802 8803multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8804 SDNode OpNode, X86FoldableSchedWrite sched> { 8805 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8806 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8807 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8808 (OpNode (_.VT _.RC:$src))>, 8809 Sched<[sched]>; 8810 8811 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8812 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8813 (OpNode (_.VT 8814 (bitconvert (_.LdFrag addr:$src))))>, 8815 Sched<[sched.Folded, sched.ReadAfterFold]>; 8816 8817 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8818 (ins _.ScalarMemOp:$src), OpcodeStr, 8819 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8820 (OpNode (_.VT 8821 (_.BroadcastLdFrag addr:$src)))>, 8822 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8823 } 8824} 8825multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8826 SDNode OpNode, X86FoldableSchedWrite sched> { 8827 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 8828 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8829 (ins _.RC:$src), OpcodeStr, 8830 "{sae}, $src", "$src, {sae}", 8831 (OpNode (_.VT _.RC:$src))>, 8832 EVEX_B, Sched<[sched]>; 8833} 8834 8835multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 8836 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8837 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8838 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 8839 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 8840 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8841 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 8842 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8843} 8844 8845multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 8846 SDNode OpNode, X86SchedWriteWidths sched> { 8847 // Define only if AVX512VL feature is present. 8848 let Predicates = [HasVLX] in { 8849 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 8850 sched.XMM>, 8851 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 8852 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 8853 sched.YMM>, 8854 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 8855 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 8856 sched.XMM>, 8857 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8858 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 8859 sched.YMM>, 8860 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8861 } 8862} 8863 8864let Predicates = [HasERI] in { 8865 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 8866 SchedWriteFRsqrt>, EVEX; 8867 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 8868 SchedWriteFRcp>, EVEX; 8869 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 8870 SchedWriteFAdd>, EVEX; 8871} 8872defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 8873 SchedWriteFRnd>, 8874 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 8875 SchedWriteFRnd>, EVEX; 8876 8877multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 8878 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8879 let ExeDomain = _.ExeDomain in 8880 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8881 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 8882 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 8883 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8884} 8885 8886multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 8887 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8888 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8889 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 8890 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8891 (_.VT (any_fsqrt _.RC:$src)), 8892 (_.VT (fsqrt _.RC:$src))>, EVEX, 8893 Sched<[sched]>; 8894 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 8895 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8896 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 8897 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 8898 Sched<[sched.Folded, sched.ReadAfterFold]>; 8899 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 8900 (ins _.ScalarMemOp:$src), OpcodeStr, 8901 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8902 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 8903 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 8904 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8905 } 8906} 8907 8908let Uses = [MXCSR], mayRaiseFPException = 1 in 8909multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 8910 X86SchedWriteSizes sched> { 8911 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8912 sched.PS.ZMM, v16f32_info>, 8913 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8914 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8915 sched.PD.ZMM, v8f64_info>, 8916 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8917 // Define only if AVX512VL feature is present. 8918 let Predicates = [HasVLX] in { 8919 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8920 sched.PS.XMM, v4f32x_info>, 8921 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 8922 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8923 sched.PS.YMM, v8f32x_info>, 8924 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 8925 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8926 sched.PD.XMM, v2f64x_info>, 8927 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8928 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8929 sched.PD.YMM, v4f64x_info>, 8930 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8931 } 8932} 8933 8934let Uses = [MXCSR] in 8935multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 8936 X86SchedWriteSizes sched> { 8937 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 8938 sched.PS.ZMM, v16f32_info>, 8939 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8940 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 8941 sched.PD.ZMM, v8f64_info>, 8942 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8943} 8944 8945multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 8946 X86VectorVTInfo _, string Name> { 8947 let ExeDomain = _.ExeDomain in { 8948 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8949 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8950 "$src2, $src1", "$src1, $src2", 8951 (X86fsqrts (_.VT _.RC:$src1), 8952 (_.VT _.RC:$src2))>, 8953 Sched<[sched]>, SIMD_EXC; 8954 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8955 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8956 "$src2, $src1", "$src1, $src2", 8957 (X86fsqrts (_.VT _.RC:$src1), 8958 (_.ScalarIntMemFrags addr:$src2))>, 8959 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8960 let Uses = [MXCSR] in 8961 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8962 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 8963 "$rc, $src2, $src1", "$src1, $src2, $rc", 8964 (X86fsqrtRnds (_.VT _.RC:$src1), 8965 (_.VT _.RC:$src2), 8966 (i32 timm:$rc))>, 8967 EVEX_B, EVEX_RC, Sched<[sched]>; 8968 8969 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { 8970 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 8971 (ins _.FRC:$src1, _.FRC:$src2), 8972 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8973 Sched<[sched]>, SIMD_EXC; 8974 let mayLoad = 1 in 8975 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 8976 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 8977 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8978 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8979 } 8980 } 8981 8982 let Predicates = [HasAVX512] in { 8983 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 8984 (!cast<Instruction>(Name#Zr) 8985 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 8986 } 8987 8988 let Predicates = [HasAVX512, OptForSize] in { 8989 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 8990 (!cast<Instruction>(Name#Zm) 8991 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 8992 } 8993} 8994 8995multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 8996 X86SchedWriteSizes sched> { 8997 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 8998 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 8999 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9000 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9001} 9002 9003defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9004 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9005 9006defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9007 9008multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9009 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9010 let ExeDomain = _.ExeDomain in { 9011 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9012 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9013 "$src3, $src2, $src1", "$src1, $src2, $src3", 9014 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9015 (i32 timm:$src3)))>, 9016 Sched<[sched]>, SIMD_EXC; 9017 9018 let Uses = [MXCSR] in 9019 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9020 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9021 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9022 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9023 (i32 timm:$src3)))>, EVEX_B, 9024 Sched<[sched]>; 9025 9026 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9027 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9028 OpcodeStr, 9029 "$src3, $src2, $src1", "$src1, $src2, $src3", 9030 (_.VT (X86RndScales _.RC:$src1, 9031 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9032 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9033 9034 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9035 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9036 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9037 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9038 []>, Sched<[sched]>, SIMD_EXC; 9039 9040 let mayLoad = 1 in 9041 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9042 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9043 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9044 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9045 } 9046 } 9047 9048 let Predicates = [HasAVX512] in { 9049 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9050 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9051 _.FRC:$src1, timm:$src2))>; 9052 } 9053 9054 let Predicates = [HasAVX512, OptForSize] in { 9055 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9056 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9057 addr:$src1, timm:$src2))>; 9058 } 9059} 9060 9061defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9062 SchedWriteFRnd.Scl, f32x_info>, 9063 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9064 EVEX_CD8<32, CD8VT1>; 9065 9066defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9067 SchedWriteFRnd.Scl, f64x_info>, 9068 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9069 EVEX_CD8<64, CD8VT1>; 9070 9071multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9072 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9073 dag OutMask, Predicate BasePredicate> { 9074 let Predicates = [BasePredicate] in { 9075 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9076 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9077 (extractelt _.VT:$dst, (iPTR 0))))), 9078 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9079 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9080 9081 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9082 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9083 ZeroFP))), 9084 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9085 OutMask, _.VT:$src2, _.VT:$src1)>; 9086 } 9087} 9088 9089defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9090 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9091 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9092defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9093 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9094 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9095 9096 9097//------------------------------------------------- 9098// Integer truncate and extend operations 9099//------------------------------------------------- 9100 9101// PatFrags that contain a select and a truncate op. The take operands in the 9102// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9103// either to the multiclasses. 9104def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9105 (vselect_mask node:$mask, 9106 (trunc node:$src), node:$src0)>; 9107def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9108 (vselect_mask node:$mask, 9109 (X86vtruncs node:$src), node:$src0)>; 9110def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9111 (vselect_mask node:$mask, 9112 (X86vtruncus node:$src), node:$src0)>; 9113 9114multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9115 SDPatternOperator MaskNode, 9116 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9117 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9118 let ExeDomain = DestInfo.ExeDomain in { 9119 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9120 (ins SrcInfo.RC:$src), 9121 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9122 [(set DestInfo.RC:$dst, 9123 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9124 EVEX, Sched<[sched]>; 9125 let Constraints = "$src0 = $dst" in 9126 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9127 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9128 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9129 [(set DestInfo.RC:$dst, 9130 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9131 (DestInfo.VT DestInfo.RC:$src0), 9132 SrcInfo.KRCWM:$mask))]>, 9133 EVEX, EVEX_K, Sched<[sched]>; 9134 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9135 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9136 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9137 [(set DestInfo.RC:$dst, 9138 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9139 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9140 EVEX, EVEX_KZ, Sched<[sched]>; 9141 } 9142 9143 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9144 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9145 (ins x86memop:$dst, SrcInfo.RC:$src), 9146 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9147 EVEX, Sched<[sched.Folded]>; 9148 9149 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9150 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9151 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9152 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9153 }//mayStore = 1, hasSideEffects = 0 9154} 9155 9156multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9157 X86VectorVTInfo DestInfo, 9158 PatFrag truncFrag, PatFrag mtruncFrag, 9159 string Name> { 9160 9161 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9162 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9163 addr:$dst, SrcInfo.RC:$src)>; 9164 9165 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9166 SrcInfo.KRCWM:$mask), 9167 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9168 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9169} 9170 9171multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9172 SDNode OpNode256, SDNode OpNode512, 9173 SDPatternOperator MaskNode128, 9174 SDPatternOperator MaskNode256, 9175 SDPatternOperator MaskNode512, 9176 X86FoldableSchedWrite sched, 9177 AVX512VLVectorVTInfo VTSrcInfo, 9178 X86VectorVTInfo DestInfoZ128, 9179 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9180 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9181 X86MemOperand x86memopZ, PatFrag truncFrag, 9182 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9183 9184 let Predicates = [HasVLX, prd] in { 9185 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9186 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9187 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, 9188 truncFrag, mtruncFrag, NAME>, EVEX_V128; 9189 9190 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9191 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9192 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, 9193 truncFrag, mtruncFrag, NAME>, EVEX_V256; 9194 } 9195 let Predicates = [prd] in 9196 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9197 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9198 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, 9199 truncFrag, mtruncFrag, NAME>, EVEX_V512; 9200} 9201 9202multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9203 SDPatternOperator MaskNode, 9204 X86FoldableSchedWrite sched, PatFrag StoreNode, 9205 PatFrag MaskedStoreNode, SDNode InVecNode, 9206 SDPatternOperator InVecMaskNode> { 9207 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9208 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9209 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9210 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9211 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9212} 9213 9214multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9215 SDPatternOperator MaskNode, 9216 X86FoldableSchedWrite sched, PatFrag StoreNode, 9217 PatFrag MaskedStoreNode, SDNode InVecNode, 9218 SDPatternOperator InVecMaskNode> { 9219 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9220 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9221 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9222 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9223 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9224} 9225 9226multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9227 SDPatternOperator MaskNode, 9228 X86FoldableSchedWrite sched, PatFrag StoreNode, 9229 PatFrag MaskedStoreNode, SDNode InVecNode, 9230 SDPatternOperator InVecMaskNode> { 9231 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9232 InVecMaskNode, MaskNode, MaskNode, sched, 9233 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9234 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9235 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9236} 9237 9238multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9239 SDPatternOperator MaskNode, 9240 X86FoldableSchedWrite sched, PatFrag StoreNode, 9241 PatFrag MaskedStoreNode, SDNode InVecNode, 9242 SDPatternOperator InVecMaskNode> { 9243 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9244 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9245 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9246 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9247 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9248} 9249 9250multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9251 SDPatternOperator MaskNode, 9252 X86FoldableSchedWrite sched, PatFrag StoreNode, 9253 PatFrag MaskedStoreNode, SDNode InVecNode, 9254 SDPatternOperator InVecMaskNode> { 9255 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9256 InVecMaskNode, MaskNode, MaskNode, sched, 9257 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9258 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9259 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9260} 9261 9262multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9263 SDPatternOperator MaskNode, 9264 X86FoldableSchedWrite sched, PatFrag StoreNode, 9265 PatFrag MaskedStoreNode, SDNode InVecNode, 9266 SDPatternOperator InVecMaskNode> { 9267 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9268 InVecMaskNode, MaskNode, MaskNode, sched, 9269 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9270 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9271 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9272} 9273 9274defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc, 9275 WriteShuffle256, truncstorevi8, 9276 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9277defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs, 9278 WriteShuffle256, truncstore_s_vi8, 9279 masked_truncstore_s_vi8, X86vtruncs, 9280 X86vmtruncs>; 9281defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, 9282 select_truncus, WriteShuffle256, 9283 truncstore_us_vi8, masked_truncstore_us_vi8, 9284 X86vtruncus, X86vmtruncus>; 9285 9286defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9287 WriteShuffle256, truncstorevi16, 9288 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9289defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9290 WriteShuffle256, truncstore_s_vi16, 9291 masked_truncstore_s_vi16, X86vtruncs, 9292 X86vmtruncs>; 9293defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9294 select_truncus, WriteShuffle256, 9295 truncstore_us_vi16, masked_truncstore_us_vi16, 9296 X86vtruncus, X86vmtruncus>; 9297 9298defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9299 WriteShuffle256, truncstorevi32, 9300 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9301defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9302 WriteShuffle256, truncstore_s_vi32, 9303 masked_truncstore_s_vi32, X86vtruncs, 9304 X86vmtruncs>; 9305defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9306 select_truncus, WriteShuffle256, 9307 truncstore_us_vi32, masked_truncstore_us_vi32, 9308 X86vtruncus, X86vmtruncus>; 9309 9310defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9311 WriteShuffle256, truncstorevi8, 9312 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9313defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9314 WriteShuffle256, truncstore_s_vi8, 9315 masked_truncstore_s_vi8, X86vtruncs, 9316 X86vmtruncs>; 9317defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9318 select_truncus, WriteShuffle256, 9319 truncstore_us_vi8, masked_truncstore_us_vi8, 9320 X86vtruncus, X86vmtruncus>; 9321 9322defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9323 WriteShuffle256, truncstorevi16, 9324 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9325defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9326 WriteShuffle256, truncstore_s_vi16, 9327 masked_truncstore_s_vi16, X86vtruncs, 9328 X86vmtruncs>; 9329defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9330 select_truncus, WriteShuffle256, 9331 truncstore_us_vi16, masked_truncstore_us_vi16, 9332 X86vtruncus, X86vmtruncus>; 9333 9334defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9335 WriteShuffle256, truncstorevi8, 9336 masked_truncstorevi8, X86vtrunc, 9337 X86vmtrunc>; 9338defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9339 WriteShuffle256, truncstore_s_vi8, 9340 masked_truncstore_s_vi8, X86vtruncs, 9341 X86vmtruncs>; 9342defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9343 select_truncus, WriteShuffle256, 9344 truncstore_us_vi8, masked_truncstore_us_vi8, 9345 X86vtruncus, X86vmtruncus>; 9346 9347let Predicates = [HasAVX512, NoVLX] in { 9348def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9349 (v8i16 (EXTRACT_SUBREG 9350 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9351 VR256X:$src, sub_ymm)))), sub_xmm))>; 9352def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9353 (v4i32 (EXTRACT_SUBREG 9354 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9355 VR256X:$src, sub_ymm)))), sub_xmm))>; 9356} 9357 9358let Predicates = [HasBWI, NoVLX] in { 9359def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9360 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9361 VR256X:$src, sub_ymm))), sub_xmm))>; 9362} 9363 9364// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9365multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9366 X86VectorVTInfo DestInfo, 9367 X86VectorVTInfo SrcInfo> { 9368 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9369 DestInfo.RC:$src0, 9370 SrcInfo.KRCWM:$mask)), 9371 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9372 SrcInfo.KRCWM:$mask, 9373 SrcInfo.RC:$src)>; 9374 9375 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9376 DestInfo.ImmAllZerosV, 9377 SrcInfo.KRCWM:$mask)), 9378 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9379 SrcInfo.RC:$src)>; 9380} 9381 9382let Predicates = [HasVLX] in { 9383defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9384defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9385defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9386} 9387 9388let Predicates = [HasAVX512] in { 9389defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9390defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9391defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9392 9393defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9394defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9395defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9396 9397defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9398defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9399defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9400} 9401 9402multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9403 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9404 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9405 let ExeDomain = DestInfo.ExeDomain in { 9406 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9407 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9408 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9409 EVEX, Sched<[sched]>; 9410 9411 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9412 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9413 (DestInfo.VT (LdFrag addr:$src))>, 9414 EVEX, Sched<[sched.Folded]>; 9415 } 9416} 9417 9418multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 9419 SDNode OpNode, SDNode InVecNode, string ExtTy, 9420 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9421 let Predicates = [HasVLX, HasBWI] in { 9422 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 9423 v16i8x_info, i64mem, LdFrag, InVecNode>, 9424 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9425 9426 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 9427 v16i8x_info, i128mem, LdFrag, OpNode>, 9428 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9429 } 9430 let Predicates = [HasBWI] in { 9431 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 9432 v32i8x_info, i256mem, LdFrag, OpNode>, 9433 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9434 } 9435} 9436 9437multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 9438 SDNode OpNode, SDNode InVecNode, string ExtTy, 9439 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9440 let Predicates = [HasVLX, HasAVX512] in { 9441 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9442 v16i8x_info, i32mem, LdFrag, InVecNode>, 9443 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9444 9445 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9446 v16i8x_info, i64mem, LdFrag, InVecNode>, 9447 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9448 } 9449 let Predicates = [HasAVX512] in { 9450 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9451 v16i8x_info, i128mem, LdFrag, OpNode>, 9452 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9453 } 9454} 9455 9456multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 9457 SDNode OpNode, SDNode InVecNode, string ExtTy, 9458 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9459 let Predicates = [HasVLX, HasAVX512] in { 9460 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9461 v16i8x_info, i16mem, LdFrag, InVecNode>, 9462 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 9463 9464 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9465 v16i8x_info, i32mem, LdFrag, InVecNode>, 9466 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 9467 } 9468 let Predicates = [HasAVX512] in { 9469 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9470 v16i8x_info, i64mem, LdFrag, InVecNode>, 9471 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 9472 } 9473} 9474 9475multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 9476 SDNode OpNode, SDNode InVecNode, string ExtTy, 9477 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9478 let Predicates = [HasVLX, HasAVX512] in { 9479 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9480 v8i16x_info, i64mem, LdFrag, InVecNode>, 9481 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9482 9483 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9484 v8i16x_info, i128mem, LdFrag, OpNode>, 9485 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9486 } 9487 let Predicates = [HasAVX512] in { 9488 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9489 v16i16x_info, i256mem, LdFrag, OpNode>, 9490 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9491 } 9492} 9493 9494multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 9495 SDNode OpNode, SDNode InVecNode, string ExtTy, 9496 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9497 let Predicates = [HasVLX, HasAVX512] in { 9498 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9499 v8i16x_info, i32mem, LdFrag, InVecNode>, 9500 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9501 9502 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9503 v8i16x_info, i64mem, LdFrag, InVecNode>, 9504 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9505 } 9506 let Predicates = [HasAVX512] in { 9507 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9508 v8i16x_info, i128mem, LdFrag, OpNode>, 9509 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9510 } 9511} 9512 9513multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 9514 SDNode OpNode, SDNode InVecNode, string ExtTy, 9515 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9516 9517 let Predicates = [HasVLX, HasAVX512] in { 9518 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9519 v4i32x_info, i64mem, LdFrag, InVecNode>, 9520 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 9521 9522 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9523 v4i32x_info, i128mem, LdFrag, OpNode>, 9524 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 9525 } 9526 let Predicates = [HasAVX512] in { 9527 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9528 v8i32x_info, i256mem, LdFrag, OpNode>, 9529 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 9530 } 9531} 9532 9533defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>; 9534defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>; 9535defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>; 9536defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>; 9537defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>; 9538defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>; 9539 9540defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>; 9541defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>; 9542defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>; 9543defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>; 9544defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>; 9545defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>; 9546 9547 9548// Patterns that we also need any extend versions of. aext_vector_inreg 9549// is currently legalized to zext_vector_inreg. 9550multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 9551 // 256-bit patterns 9552 let Predicates = [HasVLX, HasBWI] in { 9553 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 9554 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9555 } 9556 9557 let Predicates = [HasVLX] in { 9558 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 9559 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9560 9561 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 9562 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9563 } 9564 9565 // 512-bit patterns 9566 let Predicates = [HasBWI] in { 9567 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 9568 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 9569 } 9570 let Predicates = [HasAVX512] in { 9571 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 9572 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 9573 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 9574 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 9575 9576 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 9577 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 9578 9579 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 9580 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 9581 } 9582} 9583 9584multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 9585 SDNode InVecOp> : 9586 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 9587 // 128-bit patterns 9588 let Predicates = [HasVLX, HasBWI] in { 9589 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9590 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9591 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9592 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9593 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9594 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9595 } 9596 let Predicates = [HasVLX] in { 9597 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9598 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9599 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9600 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9601 9602 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 9603 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9604 9605 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9606 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9607 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9608 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9609 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9610 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9611 9612 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9613 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9614 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 9615 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9616 9617 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9618 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9619 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9620 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9621 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9622 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9623 } 9624 let Predicates = [HasVLX] in { 9625 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9626 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9627 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 9628 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9629 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9630 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9631 9632 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9633 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9634 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9635 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9636 9637 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9638 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9639 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 9640 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9641 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9642 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9643 } 9644 // 512-bit patterns 9645 let Predicates = [HasAVX512] in { 9646 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9647 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9648 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9649 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9650 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9651 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9652 } 9653} 9654 9655defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 9656defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 9657 9658// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 9659// ext+trunc aggressively making it impossible to legalize the DAG to this 9660// pattern directly. 9661let Predicates = [HasAVX512, NoBWI] in { 9662def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9663 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 9664def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 9665 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 9666} 9667 9668//===----------------------------------------------------------------------===// 9669// GATHER - SCATTER Operations 9670 9671// FIXME: Improve scheduling of gather/scatter instructions. 9672multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9673 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 9674 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 9675 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 9676 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 9677 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 9678 !strconcat(OpcodeStr#_.Suffix, 9679 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 9680 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9681 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 9682} 9683 9684multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 9685 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9686 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 9687 vy512xmem>, EVEX_V512, VEX_W; 9688 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 9689 vz512mem>, EVEX_V512, VEX_W; 9690let Predicates = [HasVLX] in { 9691 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 9692 vx256xmem>, EVEX_V256, VEX_W; 9693 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 9694 vy256xmem>, EVEX_V256, VEX_W; 9695 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 9696 vx128xmem>, EVEX_V128, VEX_W; 9697 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9698 vx128xmem>, EVEX_V128, VEX_W; 9699} 9700} 9701 9702multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 9703 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9704 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 9705 EVEX_V512; 9706 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 9707 EVEX_V512; 9708let Predicates = [HasVLX] in { 9709 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 9710 vy256xmem>, EVEX_V256; 9711 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9712 vy128xmem>, EVEX_V256; 9713 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 9714 vx128xmem>, EVEX_V128; 9715 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9716 vx64xmem, VK2WM>, EVEX_V128; 9717} 9718} 9719 9720 9721defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 9722 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 9723 9724defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 9725 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 9726 9727multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9728 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 9729 9730let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 9731 hasSideEffects = 0 in 9732 9733 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 9734 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 9735 !strconcat(OpcodeStr#_.Suffix, 9736 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 9737 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9738 Sched<[WriteStore]>; 9739} 9740 9741multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 9742 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9743 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 9744 vy512xmem>, EVEX_V512, VEX_W; 9745 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 9746 vz512mem>, EVEX_V512, VEX_W; 9747let Predicates = [HasVLX] in { 9748 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 9749 vx256xmem>, EVEX_V256, VEX_W; 9750 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 9751 vy256xmem>, EVEX_V256, VEX_W; 9752 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 9753 vx128xmem>, EVEX_V128, VEX_W; 9754 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9755 vx128xmem>, EVEX_V128, VEX_W; 9756} 9757} 9758 9759multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 9760 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9761 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 9762 EVEX_V512; 9763 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 9764 EVEX_V512; 9765let Predicates = [HasVLX] in { 9766 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 9767 vy256xmem>, EVEX_V256; 9768 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9769 vy128xmem>, EVEX_V256; 9770 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 9771 vx128xmem>, EVEX_V128; 9772 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9773 vx64xmem, VK2WM>, EVEX_V128; 9774} 9775} 9776 9777defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 9778 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 9779 9780defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 9781 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 9782 9783// prefetch 9784multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 9785 RegisterClass KRC, X86MemOperand memop> { 9786 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 9787 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 9788 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 9789 EVEX, EVEX_K, Sched<[WriteLoad]>; 9790} 9791 9792defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 9793 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9794 9795defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 9796 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9797 9798defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 9799 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9800 9801defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 9802 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9803 9804defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 9805 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9806 9807defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 9808 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9809 9810defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 9811 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9812 9813defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 9814 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9815 9816defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 9817 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9818 9819defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 9820 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9821 9822defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 9823 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9824 9825defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 9826 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9827 9828defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 9829 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9830 9831defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 9832 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9833 9834defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 9835 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9836 9837defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 9838 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9839 9840multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 9841def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 9842 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 9843 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 9844 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 9845} 9846 9847multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 9848 string OpcodeStr, Predicate prd> { 9849let Predicates = [prd] in 9850 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 9851 9852 let Predicates = [prd, HasVLX] in { 9853 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 9854 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 9855 } 9856} 9857 9858defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 9859defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 9860defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 9861defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 9862 9863multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 9864 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 9865 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 9866 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 9867 EVEX, Sched<[WriteMove]>; 9868} 9869 9870// Use 512bit version to implement 128/256 bit in case NoVLX. 9871multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 9872 X86VectorVTInfo _, 9873 string Name> { 9874 9875 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 9876 (_.KVT (COPY_TO_REGCLASS 9877 (!cast<Instruction>(Name#"Zrr") 9878 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 9879 _.RC:$src, _.SubRegIdx)), 9880 _.KRC))>; 9881} 9882 9883multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 9884 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 9885 let Predicates = [prd] in 9886 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 9887 EVEX_V512; 9888 9889 let Predicates = [prd, HasVLX] in { 9890 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 9891 EVEX_V256; 9892 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 9893 EVEX_V128; 9894 } 9895 let Predicates = [prd, NoVLX] in { 9896 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 9897 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 9898 } 9899} 9900 9901defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 9902 avx512vl_i8_info, HasBWI>; 9903defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 9904 avx512vl_i16_info, HasBWI>, VEX_W; 9905defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 9906 avx512vl_i32_info, HasDQI>; 9907defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 9908 avx512vl_i64_info, HasDQI>, VEX_W; 9909 9910// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 9911// is available, but BWI is not. We can't handle this in lowering because 9912// a target independent DAG combine likes to combine sext and trunc. 9913let Predicates = [HasDQI, NoBWI] in { 9914 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 9915 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9916 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 9917 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9918} 9919 9920let Predicates = [HasDQI, NoBWI, HasVLX] in { 9921 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 9922 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9923} 9924 9925//===----------------------------------------------------------------------===// 9926// AVX-512 - COMPRESS and EXPAND 9927// 9928 9929multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 9930 string OpcodeStr, X86FoldableSchedWrite sched> { 9931 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 9932 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9933 (null_frag)>, AVX5128IBase, 9934 Sched<[sched]>; 9935 9936 let mayStore = 1, hasSideEffects = 0 in 9937 def mr : AVX5128I<opc, MRMDestMem, (outs), 9938 (ins _.MemOp:$dst, _.RC:$src), 9939 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9940 []>, EVEX_CD8<_.EltSize, CD8VT1>, 9941 Sched<[sched.Folded]>; 9942 9943 def mrk : AVX5128I<opc, MRMDestMem, (outs), 9944 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 9945 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9946 []>, 9947 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9948 Sched<[sched.Folded]>; 9949} 9950 9951multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9952 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 9953 (!cast<Instruction>(Name#_.ZSuffix#mrk) 9954 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 9955 9956 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 9957 (!cast<Instruction>(Name#_.ZSuffix#rrk) 9958 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 9959 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 9960 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 9961 _.KRCWM:$mask, _.RC:$src)>; 9962} 9963 9964multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 9965 X86FoldableSchedWrite sched, 9966 AVX512VLVectorVTInfo VTInfo, 9967 Predicate Pred = HasAVX512> { 9968 let Predicates = [Pred] in 9969 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 9970 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 9971 9972 let Predicates = [Pred, HasVLX] in { 9973 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 9974 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 9975 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 9976 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 9977 } 9978} 9979 9980// FIXME: Is there a better scheduler class for VPCOMPRESS? 9981defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 9982 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 9983defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 9984 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 9985defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 9986 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 9987defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 9988 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 9989 9990// expand 9991multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 9992 string OpcodeStr, X86FoldableSchedWrite sched> { 9993 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9994 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9995 (null_frag)>, AVX5128IBase, 9996 Sched<[sched]>; 9997 9998 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9999 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10000 (null_frag)>, 10001 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10002 Sched<[sched.Folded, sched.ReadAfterFold]>; 10003} 10004 10005multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10006 10007 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10008 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10009 _.KRCWM:$mask, addr:$src)>; 10010 10011 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10012 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10013 _.KRCWM:$mask, addr:$src)>; 10014 10015 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10016 (_.VT _.RC:$src0))), 10017 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10018 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10019 10020 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10021 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10022 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10023 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10024 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10025 _.KRCWM:$mask, _.RC:$src)>; 10026} 10027 10028multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10029 X86FoldableSchedWrite sched, 10030 AVX512VLVectorVTInfo VTInfo, 10031 Predicate Pred = HasAVX512> { 10032 let Predicates = [Pred] in 10033 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10034 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10035 10036 let Predicates = [Pred, HasVLX] in { 10037 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10038 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10039 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10040 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10041 } 10042} 10043 10044// FIXME: Is there a better scheduler class for VPEXPAND? 10045defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10046 avx512vl_i32_info>, EVEX; 10047defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10048 avx512vl_i64_info>, EVEX, VEX_W; 10049defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10050 avx512vl_f32_info>, EVEX; 10051defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10052 avx512vl_f64_info>, EVEX, VEX_W; 10053 10054//handle instruction reg_vec1 = op(reg_vec,imm) 10055// op(mem_vec,imm) 10056// op(broadcast(eltVt),imm) 10057//all instruction created with FROUND_CURRENT 10058multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10059 SDPatternOperator OpNode, 10060 SDPatternOperator MaskOpNode, 10061 X86FoldableSchedWrite sched, 10062 X86VectorVTInfo _> { 10063 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10064 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10065 (ins _.RC:$src1, i32u8imm:$src2), 10066 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10067 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10068 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10069 Sched<[sched]>; 10070 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10071 (ins _.MemOp:$src1, i32u8imm:$src2), 10072 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10073 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10074 (i32 timm:$src2)), 10075 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10076 (i32 timm:$src2))>, 10077 Sched<[sched.Folded, sched.ReadAfterFold]>; 10078 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10079 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10080 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10081 "${src1}"#_.BroadcastStr#", $src2", 10082 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10083 (i32 timm:$src2)), 10084 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10085 (i32 timm:$src2))>, EVEX_B, 10086 Sched<[sched.Folded, sched.ReadAfterFold]>; 10087 } 10088} 10089 10090//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10091multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10092 SDNode OpNode, X86FoldableSchedWrite sched, 10093 X86VectorVTInfo _> { 10094 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10095 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10096 (ins _.RC:$src1, i32u8imm:$src2), 10097 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10098 "$src1, {sae}, $src2", 10099 (OpNode (_.VT _.RC:$src1), 10100 (i32 timm:$src2))>, 10101 EVEX_B, Sched<[sched]>; 10102} 10103 10104multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10105 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10106 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10107 Predicate prd>{ 10108 let Predicates = [prd] in { 10109 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10110 sched.ZMM, _.info512>, 10111 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10112 sched.ZMM, _.info512>, EVEX_V512; 10113 } 10114 let Predicates = [prd, HasVLX] in { 10115 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10116 sched.XMM, _.info128>, EVEX_V128; 10117 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10118 sched.YMM, _.info256>, EVEX_V256; 10119 } 10120} 10121 10122//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10123// op(reg_vec2,mem_vec,imm) 10124// op(reg_vec2,broadcast(eltVt),imm) 10125//all instruction created with FROUND_CURRENT 10126multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10127 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10128 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10129 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10130 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10131 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10132 (OpNode (_.VT _.RC:$src1), 10133 (_.VT _.RC:$src2), 10134 (i32 timm:$src3))>, 10135 Sched<[sched]>; 10136 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10137 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10138 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10139 (OpNode (_.VT _.RC:$src1), 10140 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10141 (i32 timm:$src3))>, 10142 Sched<[sched.Folded, sched.ReadAfterFold]>; 10143 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10144 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10145 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10146 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10147 (OpNode (_.VT _.RC:$src1), 10148 (_.VT (_.BroadcastLdFrag addr:$src2)), 10149 (i32 timm:$src3))>, EVEX_B, 10150 Sched<[sched.Folded, sched.ReadAfterFold]>; 10151 } 10152} 10153 10154//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10155// op(reg_vec2,mem_vec,imm) 10156multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10157 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10158 X86VectorVTInfo SrcInfo>{ 10159 let ExeDomain = DestInfo.ExeDomain in { 10160 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10161 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10162 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10163 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10164 (SrcInfo.VT SrcInfo.RC:$src2), 10165 (i8 timm:$src3)))>, 10166 Sched<[sched]>; 10167 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10168 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10169 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10170 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10171 (SrcInfo.VT (bitconvert 10172 (SrcInfo.LdFrag addr:$src2))), 10173 (i8 timm:$src3)))>, 10174 Sched<[sched.Folded, sched.ReadAfterFold]>; 10175 } 10176} 10177 10178//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10179// op(reg_vec2,mem_vec,imm) 10180// op(reg_vec2,broadcast(eltVt),imm) 10181multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10182 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10183 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10184 10185 let ExeDomain = _.ExeDomain in 10186 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10187 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10188 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10189 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10190 (OpNode (_.VT _.RC:$src1), 10191 (_.VT (_.BroadcastLdFrag addr:$src2)), 10192 (i8 timm:$src3))>, EVEX_B, 10193 Sched<[sched.Folded, sched.ReadAfterFold]>; 10194} 10195 10196//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10197// op(reg_vec2,mem_scalar,imm) 10198multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10199 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10200 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10201 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10202 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10203 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10204 (OpNode (_.VT _.RC:$src1), 10205 (_.VT _.RC:$src2), 10206 (i32 timm:$src3))>, 10207 Sched<[sched]>; 10208 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10209 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10210 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10211 (OpNode (_.VT _.RC:$src1), 10212 (_.ScalarIntMemFrags addr:$src2), 10213 (i32 timm:$src3))>, 10214 Sched<[sched.Folded, sched.ReadAfterFold]>; 10215 } 10216} 10217 10218//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10219multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10220 SDNode OpNode, X86FoldableSchedWrite sched, 10221 X86VectorVTInfo _> { 10222 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10223 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10224 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10225 OpcodeStr, "$src3, {sae}, $src2, $src1", 10226 "$src1, $src2, {sae}, $src3", 10227 (OpNode (_.VT _.RC:$src1), 10228 (_.VT _.RC:$src2), 10229 (i32 timm:$src3))>, 10230 EVEX_B, Sched<[sched]>; 10231} 10232 10233//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10234multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10235 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10236 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10237 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10238 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10239 OpcodeStr, "$src3, {sae}, $src2, $src1", 10240 "$src1, $src2, {sae}, $src3", 10241 (OpNode (_.VT _.RC:$src1), 10242 (_.VT _.RC:$src2), 10243 (i32 timm:$src3))>, 10244 EVEX_B, Sched<[sched]>; 10245} 10246 10247multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10248 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10249 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10250 let Predicates = [prd] in { 10251 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10252 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10253 EVEX_V512; 10254 10255 } 10256 let Predicates = [prd, HasVLX] in { 10257 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10258 EVEX_V128; 10259 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10260 EVEX_V256; 10261 } 10262} 10263 10264multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10265 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10266 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10267 let Predicates = [Pred] in { 10268 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10269 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10270 } 10271 let Predicates = [Pred, HasVLX] in { 10272 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10273 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10274 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10275 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10276 } 10277} 10278 10279multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10280 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10281 Predicate Pred = HasAVX512> { 10282 let Predicates = [Pred] in { 10283 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10284 EVEX_V512; 10285 } 10286 let Predicates = [Pred, HasVLX] in { 10287 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10288 EVEX_V128; 10289 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10290 EVEX_V256; 10291 } 10292} 10293 10294multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10295 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10296 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10297 let Predicates = [prd] in { 10298 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10299 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10300 } 10301} 10302 10303multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10304 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 10305 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 10306 X86SchedWriteWidths sched, Predicate prd>{ 10307 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10308 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10309 EVEX_CD8<32, CD8VF>; 10310 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10311 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10312 EVEX_CD8<64, CD8VF>, VEX_W; 10313} 10314 10315defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10316 X86VReduce, X86VReduce, X86VReduceSAE, 10317 SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX; 10318defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10319 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 10320 SchedWriteFRnd, HasAVX512>, 10321 AVX512AIi8Base, EVEX; 10322defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10323 X86VGetMant, X86VGetMant, X86VGetMantSAE, 10324 SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX; 10325 10326defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10327 0x50, X86VRange, X86VRangeSAE, 10328 SchedWriteFAdd, HasDQI>, 10329 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10330defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10331 0x50, X86VRange, X86VRangeSAE, 10332 SchedWriteFAdd, HasDQI>, 10333 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10334 10335defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10336 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10337 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10338defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10339 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10340 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10341 10342defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10343 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10344 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10345defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10346 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10347 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10348 10349defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10350 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10351 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10352defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10353 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10354 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10355 10356multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10357 X86FoldableSchedWrite sched, 10358 X86VectorVTInfo _, 10359 X86VectorVTInfo CastInfo, 10360 string EVEX2VEXOvrd> { 10361 let ExeDomain = _.ExeDomain in { 10362 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10363 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10364 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10365 (_.VT (bitconvert 10366 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10367 (i8 timm:$src3)))))>, 10368 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 10369 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10370 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10371 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10372 (_.VT 10373 (bitconvert 10374 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10375 (CastInfo.LdFrag addr:$src2), 10376 (i8 timm:$src3)))))>, 10377 Sched<[sched.Folded, sched.ReadAfterFold]>, 10378 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 10379 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10380 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10381 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10382 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10383 (_.VT 10384 (bitconvert 10385 (CastInfo.VT 10386 (X86Shuf128 _.RC:$src1, 10387 (_.BroadcastLdFrag addr:$src2), 10388 (i8 timm:$src3)))))>, EVEX_B, 10389 Sched<[sched.Folded, sched.ReadAfterFold]>; 10390 } 10391} 10392 10393multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10394 AVX512VLVectorVTInfo _, 10395 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 10396 string EVEX2VEXOvrd>{ 10397 let Predicates = [HasAVX512] in 10398 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10399 _.info512, CastInfo.info512, "">, EVEX_V512; 10400 10401 let Predicates = [HasAVX512, HasVLX] in 10402 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10403 _.info256, CastInfo.info256, 10404 EVEX2VEXOvrd>, EVEX_V256; 10405} 10406 10407defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10408 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10409defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10410 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10411defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10412 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10413defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10414 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10415 10416multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10417 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10418 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 10419 // instantiation of this class. 10420 let ExeDomain = _.ExeDomain in { 10421 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10422 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10423 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10424 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 10425 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 10426 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10427 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10428 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10429 (_.VT (X86VAlign _.RC:$src1, 10430 (bitconvert (_.LdFrag addr:$src2)), 10431 (i8 timm:$src3)))>, 10432 Sched<[sched.Folded, sched.ReadAfterFold]>, 10433 EVEX2VEXOverride<"VPALIGNRrmi">; 10434 10435 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10436 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10437 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10438 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10439 (X86VAlign _.RC:$src1, 10440 (_.VT (_.BroadcastLdFrag addr:$src2)), 10441 (i8 timm:$src3))>, EVEX_B, 10442 Sched<[sched.Folded, sched.ReadAfterFold]>; 10443 } 10444} 10445 10446multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10447 AVX512VLVectorVTInfo _> { 10448 let Predicates = [HasAVX512] in { 10449 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10450 AVX512AIi8Base, EVEX_4V, EVEX_V512; 10451 } 10452 let Predicates = [HasAVX512, HasVLX] in { 10453 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10454 AVX512AIi8Base, EVEX_4V, EVEX_V128; 10455 // We can't really override the 256-bit version so change it back to unset. 10456 let EVEX2VEXOverride = ? in 10457 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10458 AVX512AIi8Base, EVEX_4V, EVEX_V256; 10459 } 10460} 10461 10462defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10463 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10464defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10465 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10466 VEX_W; 10467 10468defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10469 SchedWriteShuffle, avx512vl_i8_info, 10470 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10471 10472// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10473// into vpalignr. 10474def ValignqImm32XForm : SDNodeXForm<timm, [{ 10475 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10476}]>; 10477def ValignqImm8XForm : SDNodeXForm<timm, [{ 10478 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10479}]>; 10480def ValigndImm8XForm : SDNodeXForm<timm, [{ 10481 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10482}]>; 10483 10484multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10485 X86VectorVTInfo From, X86VectorVTInfo To, 10486 SDNodeXForm ImmXForm> { 10487 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10488 (bitconvert 10489 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10490 timm:$src3))), 10491 To.RC:$src0)), 10492 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10493 To.RC:$src1, To.RC:$src2, 10494 (ImmXForm timm:$src3))>; 10495 10496 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10497 (bitconvert 10498 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10499 timm:$src3))), 10500 To.ImmAllZerosV)), 10501 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10502 To.RC:$src1, To.RC:$src2, 10503 (ImmXForm timm:$src3))>; 10504 10505 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10506 (bitconvert 10507 (From.VT (OpNode From.RC:$src1, 10508 (From.LdFrag addr:$src2), 10509 timm:$src3))), 10510 To.RC:$src0)), 10511 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10512 To.RC:$src1, addr:$src2, 10513 (ImmXForm timm:$src3))>; 10514 10515 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10516 (bitconvert 10517 (From.VT (OpNode From.RC:$src1, 10518 (From.LdFrag addr:$src2), 10519 timm:$src3))), 10520 To.ImmAllZerosV)), 10521 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 10522 To.RC:$src1, addr:$src2, 10523 (ImmXForm timm:$src3))>; 10524} 10525 10526multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 10527 X86VectorVTInfo From, 10528 X86VectorVTInfo To, 10529 SDNodeXForm ImmXForm> : 10530 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 10531 def : Pat<(From.VT (OpNode From.RC:$src1, 10532 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 10533 timm:$src3)), 10534 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 10535 (ImmXForm timm:$src3))>; 10536 10537 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10538 (bitconvert 10539 (From.VT (OpNode From.RC:$src1, 10540 (bitconvert 10541 (To.VT (To.BroadcastLdFrag addr:$src2))), 10542 timm:$src3))), 10543 To.RC:$src0)), 10544 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 10545 To.RC:$src1, addr:$src2, 10546 (ImmXForm timm:$src3))>; 10547 10548 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10549 (bitconvert 10550 (From.VT (OpNode From.RC:$src1, 10551 (bitconvert 10552 (To.VT (To.BroadcastLdFrag addr:$src2))), 10553 timm:$src3))), 10554 To.ImmAllZerosV)), 10555 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 10556 To.RC:$src1, addr:$src2, 10557 (ImmXForm timm:$src3))>; 10558} 10559 10560let Predicates = [HasAVX512] in { 10561 // For 512-bit we lower to the widest element type we can. So we only need 10562 // to handle converting valignq to valignd. 10563 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 10564 v16i32_info, ValignqImm32XForm>; 10565} 10566 10567let Predicates = [HasVLX] in { 10568 // For 128-bit we lower to the widest element type we can. So we only need 10569 // to handle converting valignq to valignd. 10570 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 10571 v4i32x_info, ValignqImm32XForm>; 10572 // For 256-bit we lower to the widest element type we can. So we only need 10573 // to handle converting valignq to valignd. 10574 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 10575 v8i32x_info, ValignqImm32XForm>; 10576} 10577 10578let Predicates = [HasVLX, HasBWI] in { 10579 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 10580 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 10581 v16i8x_info, ValignqImm8XForm>; 10582 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 10583 v16i8x_info, ValigndImm8XForm>; 10584} 10585 10586defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 10587 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 10588 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 10589 10590multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10591 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10592 let ExeDomain = _.ExeDomain in { 10593 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10594 (ins _.RC:$src1), OpcodeStr, 10595 "$src1", "$src1", 10596 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 10597 Sched<[sched]>; 10598 10599 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10600 (ins _.MemOp:$src1), OpcodeStr, 10601 "$src1", "$src1", 10602 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 10603 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 10604 Sched<[sched.Folded]>; 10605 } 10606} 10607 10608multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10609 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 10610 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 10611 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10612 (ins _.ScalarMemOp:$src1), OpcodeStr, 10613 "${src1}"#_.BroadcastStr, 10614 "${src1}"#_.BroadcastStr, 10615 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 10616 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 10617 Sched<[sched.Folded]>; 10618} 10619 10620multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10621 X86SchedWriteWidths sched, 10622 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10623 let Predicates = [prd] in 10624 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10625 EVEX_V512; 10626 10627 let Predicates = [prd, HasVLX] in { 10628 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10629 EVEX_V256; 10630 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10631 EVEX_V128; 10632 } 10633} 10634 10635multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10636 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 10637 Predicate prd> { 10638 let Predicates = [prd] in 10639 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10640 EVEX_V512; 10641 10642 let Predicates = [prd, HasVLX] in { 10643 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10644 EVEX_V256; 10645 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10646 EVEX_V128; 10647 } 10648} 10649 10650multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 10651 SDNode OpNode, X86SchedWriteWidths sched, 10652 Predicate prd> { 10653 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 10654 avx512vl_i64_info, prd>, VEX_W; 10655 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 10656 avx512vl_i32_info, prd>; 10657} 10658 10659multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 10660 SDNode OpNode, X86SchedWriteWidths sched, 10661 Predicate prd> { 10662 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 10663 avx512vl_i16_info, prd>, VEX_WIG; 10664 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 10665 avx512vl_i8_info, prd>, VEX_WIG; 10666} 10667 10668multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 10669 bits<8> opc_d, bits<8> opc_q, 10670 string OpcodeStr, SDNode OpNode, 10671 X86SchedWriteWidths sched> { 10672 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 10673 HasAVX512>, 10674 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 10675 HasBWI>; 10676} 10677 10678defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 10679 SchedWriteVecALU>; 10680 10681// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 10682let Predicates = [HasAVX512, NoVLX] in { 10683 def : Pat<(v4i64 (abs VR256X:$src)), 10684 (EXTRACT_SUBREG 10685 (VPABSQZrr 10686 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 10687 sub_ymm)>; 10688 def : Pat<(v2i64 (abs VR128X:$src)), 10689 (EXTRACT_SUBREG 10690 (VPABSQZrr 10691 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 10692 sub_xmm)>; 10693} 10694 10695// Use 512bit version to implement 128/256 bit. 10696multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 10697 AVX512VLVectorVTInfo _, Predicate prd> { 10698 let Predicates = [prd, NoVLX] in { 10699 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 10700 (EXTRACT_SUBREG 10701 (!cast<Instruction>(InstrStr # "Zrr") 10702 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10703 _.info256.RC:$src1, 10704 _.info256.SubRegIdx)), 10705 _.info256.SubRegIdx)>; 10706 10707 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 10708 (EXTRACT_SUBREG 10709 (!cast<Instruction>(InstrStr # "Zrr") 10710 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10711 _.info128.RC:$src1, 10712 _.info128.SubRegIdx)), 10713 _.info128.SubRegIdx)>; 10714 } 10715} 10716 10717defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 10718 SchedWriteVecIMul, HasCDI>; 10719 10720// FIXME: Is there a better scheduler class for VPCONFLICT? 10721defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 10722 SchedWriteVecALU, HasCDI>; 10723 10724// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 10725defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 10726defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 10727 10728//===---------------------------------------------------------------------===// 10729// Counts number of ones - VPOPCNTD and VPOPCNTQ 10730//===---------------------------------------------------------------------===// 10731 10732// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 10733defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 10734 SchedWriteVecALU, HasVPOPCNTDQ>; 10735 10736defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 10737defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 10738 10739//===---------------------------------------------------------------------===// 10740// Replicate Single FP - MOVSHDUP and MOVSLDUP 10741//===---------------------------------------------------------------------===// 10742 10743multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 10744 X86SchedWriteWidths sched> { 10745 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 10746 avx512vl_f32_info, HasAVX512>, XS; 10747} 10748 10749defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 10750 SchedWriteFShuffle>; 10751defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 10752 SchedWriteFShuffle>; 10753 10754//===----------------------------------------------------------------------===// 10755// AVX-512 - MOVDDUP 10756//===----------------------------------------------------------------------===// 10757 10758multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 10759 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10760 let ExeDomain = _.ExeDomain in { 10761 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10762 (ins _.RC:$src), OpcodeStr, "$src", "$src", 10763 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 10764 Sched<[sched]>; 10765 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10766 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 10767 (_.VT (_.BroadcastLdFrag addr:$src))>, 10768 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 10769 Sched<[sched.Folded]>; 10770 } 10771} 10772 10773multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 10774 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 10775 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 10776 VTInfo.info512>, EVEX_V512; 10777 10778 let Predicates = [HasAVX512, HasVLX] in { 10779 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 10780 VTInfo.info256>, EVEX_V256; 10781 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 10782 VTInfo.info128>, EVEX_V128; 10783 } 10784} 10785 10786multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, 10787 X86SchedWriteWidths sched> { 10788 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, 10789 avx512vl_f64_info>, XD, VEX_W; 10790} 10791 10792defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; 10793 10794let Predicates = [HasVLX] in { 10795def : Pat<(v2f64 (X86VBroadcast f64:$src)), 10796 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10797 10798def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10799 (v2f64 VR128X:$src0)), 10800 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 10801 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10802def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10803 immAllZerosV), 10804 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10805} 10806 10807//===----------------------------------------------------------------------===// 10808// AVX-512 - Unpack Instructions 10809//===----------------------------------------------------------------------===// 10810 10811let Uses = []<Register>, mayRaiseFPException = 0 in { 10812defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 10813 SchedWriteFShuffleSizes, 0, 1>; 10814defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 10815 SchedWriteFShuffleSizes>; 10816} 10817 10818defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 10819 SchedWriteShuffle, HasBWI>; 10820defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 10821 SchedWriteShuffle, HasBWI>; 10822defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 10823 SchedWriteShuffle, HasBWI>; 10824defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 10825 SchedWriteShuffle, HasBWI>; 10826 10827defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 10828 SchedWriteShuffle, HasAVX512>; 10829defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 10830 SchedWriteShuffle, HasAVX512>; 10831defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 10832 SchedWriteShuffle, HasAVX512>; 10833defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 10834 SchedWriteShuffle, HasAVX512>; 10835 10836//===----------------------------------------------------------------------===// 10837// AVX-512 - Extract & Insert Integer Instructions 10838//===----------------------------------------------------------------------===// 10839 10840multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10841 X86VectorVTInfo _> { 10842 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 10843 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10844 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10845 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 10846 addr:$dst)]>, 10847 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 10848} 10849 10850multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 10851 let Predicates = [HasBWI] in { 10852 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 10853 (ins _.RC:$src1, u8imm:$src2), 10854 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10855 [(set GR32orGR64:$dst, 10856 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 10857 EVEX, TAPD, Sched<[WriteVecExtract]>; 10858 10859 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 10860 } 10861} 10862 10863multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 10864 let Predicates = [HasBWI] in { 10865 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 10866 (ins _.RC:$src1, u8imm:$src2), 10867 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10868 [(set GR32orGR64:$dst, 10869 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 10870 EVEX, PD, Sched<[WriteVecExtract]>; 10871 10872 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 10873 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 10874 (ins _.RC:$src1, u8imm:$src2), 10875 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 10876 EVEX, TAPD, FoldGenData<NAME#rr>, 10877 Sched<[WriteVecExtract]>; 10878 10879 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 10880 } 10881} 10882 10883multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 10884 RegisterClass GRC> { 10885 let Predicates = [HasDQI] in { 10886 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 10887 (ins _.RC:$src1, u8imm:$src2), 10888 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10889 [(set GRC:$dst, 10890 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 10891 EVEX, TAPD, Sched<[WriteVecExtract]>; 10892 10893 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 10894 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10895 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10896 [(store (extractelt (_.VT _.RC:$src1), 10897 imm:$src2),addr:$dst)]>, 10898 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 10899 Sched<[WriteVecExtractSt]>; 10900 } 10901} 10902 10903defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 10904defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 10905defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 10906defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 10907 10908multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10909 X86VectorVTInfo _, PatFrag LdFrag, 10910 SDPatternOperator immoperator> { 10911 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 10912 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10913 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10914 [(set _.RC:$dst, 10915 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 10916 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 10917} 10918 10919multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 10920 X86VectorVTInfo _, PatFrag LdFrag> { 10921 let Predicates = [HasBWI] in { 10922 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10923 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 10924 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10925 [(set _.RC:$dst, 10926 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, 10927 Sched<[WriteVecInsert]>; 10928 10929 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 10930 } 10931} 10932 10933multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 10934 X86VectorVTInfo _, RegisterClass GRC> { 10935 let Predicates = [HasDQI] in { 10936 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10937 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 10938 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10939 [(set _.RC:$dst, 10940 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 10941 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 10942 10943 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 10944 _.ScalarLdFrag, imm>, TAPD; 10945 } 10946} 10947 10948defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 10949 extloadi8>, TAPD, VEX_WIG; 10950defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 10951 extloadi16>, PD, VEX_WIG; 10952defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 10953defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 10954 10955//===----------------------------------------------------------------------===// 10956// VSHUFPS - VSHUFPD Operations 10957//===----------------------------------------------------------------------===// 10958 10959multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, 10960 AVX512VLVectorVTInfo VTInfo_FP>{ 10961 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 10962 SchedWriteFShuffle>, 10963 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 10964 AVX512AIi8Base, EVEX_4V; 10965} 10966 10967defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; 10968defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; 10969 10970//===----------------------------------------------------------------------===// 10971// AVX-512 - Byte shift Left/Right 10972//===----------------------------------------------------------------------===// 10973 10974multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 10975 Format MRMm, string OpcodeStr, 10976 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10977 def ri : AVX512<opc, MRMr, 10978 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 10979 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 10980 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 10981 Sched<[sched]>; 10982 def mi : AVX512<opc, MRMm, 10983 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 10984 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 10985 [(set _.RC:$dst,(_.VT (OpNode 10986 (_.VT (bitconvert (_.LdFrag addr:$src1))), 10987 (i8 timm:$src2))))]>, 10988 Sched<[sched.Folded, sched.ReadAfterFold]>; 10989} 10990 10991multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 10992 Format MRMm, string OpcodeStr, 10993 X86SchedWriteWidths sched, Predicate prd>{ 10994 let Predicates = [prd] in 10995 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 10996 sched.ZMM, v64i8_info>, EVEX_V512; 10997 let Predicates = [prd, HasVLX] in { 10998 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 10999 sched.YMM, v32i8x_info>, EVEX_V256; 11000 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11001 sched.XMM, v16i8x_info>, EVEX_V128; 11002 } 11003} 11004defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11005 SchedWriteShuffle, HasBWI>, 11006 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11007defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11008 SchedWriteShuffle, HasBWI>, 11009 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11010 11011multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11012 string OpcodeStr, X86FoldableSchedWrite sched, 11013 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11014 let isCommutable = 1 in 11015 def rr : AVX512BI<opc, MRMSrcReg, 11016 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11017 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11018 [(set _dst.RC:$dst,(_dst.VT 11019 (OpNode (_src.VT _src.RC:$src1), 11020 (_src.VT _src.RC:$src2))))]>, 11021 Sched<[sched]>; 11022 def rm : AVX512BI<opc, MRMSrcMem, 11023 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11024 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11025 [(set _dst.RC:$dst,(_dst.VT 11026 (OpNode (_src.VT _src.RC:$src1), 11027 (_src.VT (bitconvert 11028 (_src.LdFrag addr:$src2))))))]>, 11029 Sched<[sched.Folded, sched.ReadAfterFold]>; 11030} 11031 11032multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11033 string OpcodeStr, X86SchedWriteWidths sched, 11034 Predicate prd> { 11035 let Predicates = [prd] in 11036 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11037 v8i64_info, v64i8_info>, EVEX_V512; 11038 let Predicates = [prd, HasVLX] in { 11039 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11040 v4i64x_info, v32i8x_info>, EVEX_V256; 11041 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11042 v2i64x_info, v16i8x_info>, EVEX_V128; 11043 } 11044} 11045 11046defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11047 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11048 11049// Transforms to swizzle an immediate to enable better matching when 11050// memory operand isn't in the right place. 11051def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11052 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11053 uint8_t Imm = N->getZExtValue(); 11054 // Swap bits 1/4 and 3/6. 11055 uint8_t NewImm = Imm & 0xa5; 11056 if (Imm & 0x02) NewImm |= 0x10; 11057 if (Imm & 0x10) NewImm |= 0x02; 11058 if (Imm & 0x08) NewImm |= 0x40; 11059 if (Imm & 0x40) NewImm |= 0x08; 11060 return getI8Imm(NewImm, SDLoc(N)); 11061}]>; 11062def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11063 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11064 uint8_t Imm = N->getZExtValue(); 11065 // Swap bits 2/4 and 3/5. 11066 uint8_t NewImm = Imm & 0xc3; 11067 if (Imm & 0x04) NewImm |= 0x10; 11068 if (Imm & 0x10) NewImm |= 0x04; 11069 if (Imm & 0x08) NewImm |= 0x20; 11070 if (Imm & 0x20) NewImm |= 0x08; 11071 return getI8Imm(NewImm, SDLoc(N)); 11072}]>; 11073def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11074 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11075 uint8_t Imm = N->getZExtValue(); 11076 // Swap bits 1/2 and 5/6. 11077 uint8_t NewImm = Imm & 0x99; 11078 if (Imm & 0x02) NewImm |= 0x04; 11079 if (Imm & 0x04) NewImm |= 0x02; 11080 if (Imm & 0x20) NewImm |= 0x40; 11081 if (Imm & 0x40) NewImm |= 0x20; 11082 return getI8Imm(NewImm, SDLoc(N)); 11083}]>; 11084def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11085 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11086 uint8_t Imm = N->getZExtValue(); 11087 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11088 uint8_t NewImm = Imm & 0x81; 11089 if (Imm & 0x02) NewImm |= 0x04; 11090 if (Imm & 0x04) NewImm |= 0x10; 11091 if (Imm & 0x08) NewImm |= 0x40; 11092 if (Imm & 0x10) NewImm |= 0x02; 11093 if (Imm & 0x20) NewImm |= 0x08; 11094 if (Imm & 0x40) NewImm |= 0x20; 11095 return getI8Imm(NewImm, SDLoc(N)); 11096}]>; 11097def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11098 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11099 uint8_t Imm = N->getZExtValue(); 11100 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11101 uint8_t NewImm = Imm & 0x81; 11102 if (Imm & 0x02) NewImm |= 0x10; 11103 if (Imm & 0x04) NewImm |= 0x02; 11104 if (Imm & 0x08) NewImm |= 0x20; 11105 if (Imm & 0x10) NewImm |= 0x04; 11106 if (Imm & 0x20) NewImm |= 0x40; 11107 if (Imm & 0x40) NewImm |= 0x08; 11108 return getI8Imm(NewImm, SDLoc(N)); 11109}]>; 11110 11111multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11112 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11113 string Name>{ 11114 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11115 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11116 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11117 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11118 (OpNode (_.VT _.RC:$src1), 11119 (_.VT _.RC:$src2), 11120 (_.VT _.RC:$src3), 11121 (i8 timm:$src4)), 1, 1>, 11122 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11123 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11124 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11125 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11126 (OpNode (_.VT _.RC:$src1), 11127 (_.VT _.RC:$src2), 11128 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11129 (i8 timm:$src4)), 1, 0>, 11130 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11131 Sched<[sched.Folded, sched.ReadAfterFold]>; 11132 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11133 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11134 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11135 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11136 (OpNode (_.VT _.RC:$src1), 11137 (_.VT _.RC:$src2), 11138 (_.VT (_.BroadcastLdFrag addr:$src3)), 11139 (i8 timm:$src4)), 1, 0>, EVEX_B, 11140 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11141 Sched<[sched.Folded, sched.ReadAfterFold]>; 11142 }// Constraints = "$src1 = $dst" 11143 11144 // Additional patterns for matching passthru operand in other positions. 11145 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11146 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11147 _.RC:$src1)), 11148 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11149 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11150 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11151 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11152 _.RC:$src1)), 11153 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11154 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11155 11156 // Additional patterns for matching zero masking with loads in other 11157 // positions. 11158 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11159 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11160 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11161 _.ImmAllZerosV)), 11162 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11163 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11164 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11165 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11166 _.RC:$src2, (i8 timm:$src4)), 11167 _.ImmAllZerosV)), 11168 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11169 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11170 11171 // Additional patterns for matching masked loads with different 11172 // operand orders. 11173 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11174 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11175 _.RC:$src2, (i8 timm:$src4)), 11176 _.RC:$src1)), 11177 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11178 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11179 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11180 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11181 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11182 _.RC:$src1)), 11183 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11184 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11185 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11186 (OpNode _.RC:$src2, _.RC:$src1, 11187 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11188 _.RC:$src1)), 11189 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11190 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11191 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11192 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11193 _.RC:$src1, (i8 timm:$src4)), 11194 _.RC:$src1)), 11195 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11196 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11197 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11198 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11199 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11200 _.RC:$src1)), 11201 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11202 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11203 11204 // Additional patterns for matching zero masking with broadcasts in other 11205 // positions. 11206 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11207 (OpNode (_.BroadcastLdFrag addr:$src3), 11208 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11209 _.ImmAllZerosV)), 11210 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11211 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11212 (VPTERNLOG321_imm8 timm:$src4))>; 11213 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11214 (OpNode _.RC:$src1, 11215 (_.BroadcastLdFrag addr:$src3), 11216 _.RC:$src2, (i8 timm:$src4)), 11217 _.ImmAllZerosV)), 11218 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11219 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11220 (VPTERNLOG132_imm8 timm:$src4))>; 11221 11222 // Additional patterns for matching masked broadcasts with different 11223 // operand orders. 11224 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11225 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11226 _.RC:$src2, (i8 timm:$src4)), 11227 _.RC:$src1)), 11228 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11229 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11230 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11231 (OpNode (_.BroadcastLdFrag addr:$src3), 11232 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11233 _.RC:$src1)), 11234 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11235 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11236 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11237 (OpNode _.RC:$src2, _.RC:$src1, 11238 (_.BroadcastLdFrag addr:$src3), 11239 (i8 timm:$src4)), _.RC:$src1)), 11240 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11241 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11242 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11243 (OpNode _.RC:$src2, 11244 (_.BroadcastLdFrag addr:$src3), 11245 _.RC:$src1, (i8 timm:$src4)), 11246 _.RC:$src1)), 11247 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11248 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11249 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11250 (OpNode (_.BroadcastLdFrag addr:$src3), 11251 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11252 _.RC:$src1)), 11253 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11254 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11255} 11256 11257multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11258 AVX512VLVectorVTInfo _> { 11259 let Predicates = [HasAVX512] in 11260 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11261 _.info512, NAME>, EVEX_V512; 11262 let Predicates = [HasAVX512, HasVLX] in { 11263 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11264 _.info128, NAME>, EVEX_V128; 11265 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11266 _.info256, NAME>, EVEX_V256; 11267 } 11268} 11269 11270defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11271 avx512vl_i32_info>; 11272defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11273 avx512vl_i64_info>, VEX_W; 11274 11275// Patterns to implement vnot using vpternlog instead of creating all ones 11276// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11277// so that the result is only dependent on src0. But we use the same source 11278// for all operands to prevent a false dependency. 11279// TODO: We should maybe have a more generalized algorithm for folding to 11280// vpternlog. 11281let Predicates = [HasAVX512] in { 11282 def : Pat<(v64i8 (vnot VR512:$src)), 11283 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11284 def : Pat<(v32i16 (vnot VR512:$src)), 11285 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11286 def : Pat<(v16i32 (vnot VR512:$src)), 11287 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11288 def : Pat<(v8i64 (vnot VR512:$src)), 11289 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11290} 11291 11292let Predicates = [HasAVX512, NoVLX] in { 11293 def : Pat<(v16i8 (vnot VR128X:$src)), 11294 (EXTRACT_SUBREG 11295 (VPTERNLOGQZrri 11296 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11297 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11298 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11299 (i8 15)), sub_xmm)>; 11300 def : Pat<(v8i16 (vnot VR128X:$src)), 11301 (EXTRACT_SUBREG 11302 (VPTERNLOGQZrri 11303 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11304 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11305 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11306 (i8 15)), sub_xmm)>; 11307 def : Pat<(v4i32 (vnot VR128X:$src)), 11308 (EXTRACT_SUBREG 11309 (VPTERNLOGQZrri 11310 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11311 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11312 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11313 (i8 15)), sub_xmm)>; 11314 def : Pat<(v2i64 (vnot VR128X:$src)), 11315 (EXTRACT_SUBREG 11316 (VPTERNLOGQZrri 11317 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11318 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11319 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11320 (i8 15)), sub_xmm)>; 11321 11322 def : Pat<(v32i8 (vnot VR256X:$src)), 11323 (EXTRACT_SUBREG 11324 (VPTERNLOGQZrri 11325 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11326 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11327 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11328 (i8 15)), sub_ymm)>; 11329 def : Pat<(v16i16 (vnot VR256X:$src)), 11330 (EXTRACT_SUBREG 11331 (VPTERNLOGQZrri 11332 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11333 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11334 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11335 (i8 15)), sub_ymm)>; 11336 def : Pat<(v8i32 (vnot VR256X:$src)), 11337 (EXTRACT_SUBREG 11338 (VPTERNLOGQZrri 11339 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11340 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11341 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11342 (i8 15)), sub_ymm)>; 11343 def : Pat<(v4i64 (vnot VR256X:$src)), 11344 (EXTRACT_SUBREG 11345 (VPTERNLOGQZrri 11346 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11347 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11348 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11349 (i8 15)), sub_ymm)>; 11350} 11351 11352let Predicates = [HasVLX] in { 11353 def : Pat<(v16i8 (vnot VR128X:$src)), 11354 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11355 def : Pat<(v8i16 (vnot VR128X:$src)), 11356 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11357 def : Pat<(v4i32 (vnot VR128X:$src)), 11358 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11359 def : Pat<(v2i64 (vnot VR128X:$src)), 11360 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11361 11362 def : Pat<(v32i8 (vnot VR256X:$src)), 11363 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11364 def : Pat<(v16i16 (vnot VR256X:$src)), 11365 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11366 def : Pat<(v8i32 (vnot VR256X:$src)), 11367 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11368 def : Pat<(v4i64 (vnot VR256X:$src)), 11369 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11370} 11371 11372//===----------------------------------------------------------------------===// 11373// AVX-512 - FixupImm 11374//===----------------------------------------------------------------------===// 11375 11376multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11377 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11378 X86VectorVTInfo TblVT>{ 11379 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 11380 Uses = [MXCSR], mayRaiseFPException = 1 in { 11381 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11382 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11383 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11384 (X86VFixupimm (_.VT _.RC:$src1), 11385 (_.VT _.RC:$src2), 11386 (TblVT.VT _.RC:$src3), 11387 (i32 timm:$src4))>, Sched<[sched]>; 11388 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11389 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11390 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11391 (X86VFixupimm (_.VT _.RC:$src1), 11392 (_.VT _.RC:$src2), 11393 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11394 (i32 timm:$src4))>, 11395 Sched<[sched.Folded, sched.ReadAfterFold]>; 11396 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11397 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11398 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11399 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11400 (X86VFixupimm (_.VT _.RC:$src1), 11401 (_.VT _.RC:$src2), 11402 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 11403 (i32 timm:$src4))>, 11404 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11405 } // Constraints = "$src1 = $dst" 11406} 11407 11408multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11409 X86FoldableSchedWrite sched, 11410 X86VectorVTInfo _, X86VectorVTInfo TblVT> 11411 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 11412let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 11413 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11414 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11415 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11416 "$src2, $src3, {sae}, $src4", 11417 (X86VFixupimmSAE (_.VT _.RC:$src1), 11418 (_.VT _.RC:$src2), 11419 (TblVT.VT _.RC:$src3), 11420 (i32 timm:$src4))>, 11421 EVEX_B, Sched<[sched]>; 11422 } 11423} 11424 11425multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 11426 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11427 X86VectorVTInfo _src3VT> { 11428 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11429 ExeDomain = _.ExeDomain in { 11430 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11431 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11432 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11433 (X86VFixupimms (_.VT _.RC:$src1), 11434 (_.VT _.RC:$src2), 11435 (_src3VT.VT _src3VT.RC:$src3), 11436 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 11437 let Uses = [MXCSR] in 11438 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11439 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11440 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11441 "$src2, $src3, {sae}, $src4", 11442 (X86VFixupimmSAEs (_.VT _.RC:$src1), 11443 (_.VT _.RC:$src2), 11444 (_src3VT.VT _src3VT.RC:$src3), 11445 (i32 timm:$src4))>, 11446 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11447 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11448 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11449 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11450 (X86VFixupimms (_.VT _.RC:$src1), 11451 (_.VT _.RC:$src2), 11452 (_src3VT.VT (scalar_to_vector 11453 (_src3VT.ScalarLdFrag addr:$src3))), 11454 (i32 timm:$src4))>, 11455 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 11456 } 11457} 11458 11459multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11460 AVX512VLVectorVTInfo _Vec, 11461 AVX512VLVectorVTInfo _Tbl> { 11462 let Predicates = [HasAVX512] in 11463 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 11464 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11465 EVEX_4V, EVEX_V512; 11466 let Predicates = [HasAVX512, HasVLX] in { 11467 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 11468 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11469 EVEX_4V, EVEX_V128; 11470 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 11471 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11472 EVEX_4V, EVEX_V256; 11473 } 11474} 11475 11476defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11477 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11478 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11479defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11480 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11481 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11482defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11483 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11484defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11485 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 11486 11487// Patterns used to select SSE scalar fp arithmetic instructions from 11488// either: 11489// 11490// (1) a scalar fp operation followed by a blend 11491// 11492// The effect is that the backend no longer emits unnecessary vector 11493// insert instructions immediately after SSE scalar fp instructions 11494// like addss or mulss. 11495// 11496// For example, given the following code: 11497// __m128 foo(__m128 A, __m128 B) { 11498// A[0] += B[0]; 11499// return A; 11500// } 11501// 11502// Previously we generated: 11503// addss %xmm0, %xmm1 11504// movss %xmm1, %xmm0 11505// 11506// We now generate: 11507// addss %xmm1, %xmm0 11508// 11509// (2) a vector packed single/double fp operation followed by a vector insert 11510// 11511// The effect is that the backend converts the packed fp instruction 11512// followed by a vector insert into a single SSE scalar fp instruction. 11513// 11514// For example, given the following code: 11515// __m128 foo(__m128 A, __m128 B) { 11516// __m128 C = A + B; 11517// return (__m128) {c[0], a[1], a[2], a[3]}; 11518// } 11519// 11520// Previously we generated: 11521// addps %xmm0, %xmm1 11522// movss %xmm1, %xmm0 11523// 11524// We now generate: 11525// addss %xmm1, %xmm0 11526 11527// TODO: Some canonicalization in lowering would simplify the number of 11528// patterns we have to try to match. 11529multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 11530 string OpcPrefix, SDNode MoveNode, 11531 X86VectorVTInfo _, PatLeaf ZeroFP> { 11532 let Predicates = [HasAVX512] in { 11533 // extracted scalar math op with insert via movss 11534 def : Pat<(MoveNode 11535 (_.VT VR128X:$dst), 11536 (_.VT (scalar_to_vector 11537 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11538 _.FRC:$src)))), 11539 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 11540 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 11541 def : Pat<(MoveNode 11542 (_.VT VR128X:$dst), 11543 (_.VT (scalar_to_vector 11544 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11545 (_.ScalarLdFrag addr:$src))))), 11546 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 11547 11548 // extracted masked scalar math op with insert via movss 11549 def : Pat<(MoveNode (_.VT VR128X:$src1), 11550 (scalar_to_vector 11551 (X86selects_mask VK1WM:$mask, 11552 (MaskedOp (_.EltVT 11553 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11554 _.FRC:$src2), 11555 _.FRC:$src0))), 11556 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 11557 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11558 VK1WM:$mask, _.VT:$src1, 11559 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11560 def : Pat<(MoveNode (_.VT VR128X:$src1), 11561 (scalar_to_vector 11562 (X86selects_mask VK1WM:$mask, 11563 (MaskedOp (_.EltVT 11564 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11565 (_.ScalarLdFrag addr:$src2)), 11566 _.FRC:$src0))), 11567 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 11568 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11569 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11570 11571 // extracted masked scalar math op with insert via movss 11572 def : Pat<(MoveNode (_.VT VR128X:$src1), 11573 (scalar_to_vector 11574 (X86selects_mask VK1WM:$mask, 11575 (MaskedOp (_.EltVT 11576 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11577 _.FRC:$src2), (_.EltVT ZeroFP)))), 11578 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 11579 VK1WM:$mask, _.VT:$src1, 11580 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11581 def : Pat<(MoveNode (_.VT VR128X:$src1), 11582 (scalar_to_vector 11583 (X86selects_mask VK1WM:$mask, 11584 (MaskedOp (_.EltVT 11585 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11586 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 11587 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11588 } 11589} 11590 11591defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 11592defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 11593defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 11594defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 11595 11596defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 11597defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 11598defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 11599defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 11600 11601multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 11602 SDNode Move, X86VectorVTInfo _> { 11603 let Predicates = [HasAVX512] in { 11604 def : Pat<(_.VT (Move _.VT:$dst, 11605 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11606 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 11607 } 11608} 11609 11610defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 11611defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 11612 11613//===----------------------------------------------------------------------===// 11614// AES instructions 11615//===----------------------------------------------------------------------===// 11616 11617multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 11618 let Predicates = [HasVLX, HasVAES] in { 11619 defm Z128 : AESI_binop_rm_int<Op, OpStr, 11620 !cast<Intrinsic>(IntPrefix), 11621 loadv2i64, 0, VR128X, i128mem>, 11622 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 11623 defm Z256 : AESI_binop_rm_int<Op, OpStr, 11624 !cast<Intrinsic>(IntPrefix#"_256"), 11625 loadv4i64, 0, VR256X, i256mem>, 11626 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 11627 } 11628 let Predicates = [HasAVX512, HasVAES] in 11629 defm Z : AESI_binop_rm_int<Op, OpStr, 11630 !cast<Intrinsic>(IntPrefix#"_512"), 11631 loadv8i64, 0, VR512, i512mem>, 11632 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 11633} 11634 11635defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 11636defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 11637defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 11638defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 11639 11640//===----------------------------------------------------------------------===// 11641// PCLMUL instructions - Carry less multiplication 11642//===----------------------------------------------------------------------===// 11643 11644let Predicates = [HasAVX512, HasVPCLMULQDQ] in 11645defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 11646 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 11647 11648let Predicates = [HasVLX, HasVPCLMULQDQ] in { 11649defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 11650 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 11651 11652defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 11653 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 11654 EVEX_CD8<64, CD8VF>, VEX_WIG; 11655} 11656 11657// Aliases 11658defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 11659defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 11660defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 11661 11662//===----------------------------------------------------------------------===// 11663// VBMI2 11664//===----------------------------------------------------------------------===// 11665 11666multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 11667 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11668 let Constraints = "$src1 = $dst", 11669 ExeDomain = VTI.ExeDomain in { 11670 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11671 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11672 "$src3, $src2", "$src2, $src3", 11673 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 11674 AVX512FMA3Base, Sched<[sched]>; 11675 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11676 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11677 "$src3, $src2", "$src2, $src3", 11678 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11679 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11680 AVX512FMA3Base, 11681 Sched<[sched.Folded, sched.ReadAfterFold]>; 11682 } 11683} 11684 11685multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11686 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 11687 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 11688 let Constraints = "$src1 = $dst", 11689 ExeDomain = VTI.ExeDomain in 11690 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11691 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 11692 "${src3}"#VTI.BroadcastStr#", $src2", 11693 "$src2, ${src3}"#VTI.BroadcastStr, 11694 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11695 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 11696 AVX512FMA3Base, EVEX_B, 11697 Sched<[sched.Folded, sched.ReadAfterFold]>; 11698} 11699 11700multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 11701 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11702 let Predicates = [HasVBMI2] in 11703 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11704 EVEX_V512; 11705 let Predicates = [HasVBMI2, HasVLX] in { 11706 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11707 EVEX_V256; 11708 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11709 EVEX_V128; 11710 } 11711} 11712 11713multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 11714 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11715 let Predicates = [HasVBMI2] in 11716 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11717 EVEX_V512; 11718 let Predicates = [HasVBMI2, HasVLX] in { 11719 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11720 EVEX_V256; 11721 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11722 EVEX_V128; 11723 } 11724} 11725multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 11726 SDNode OpNode, X86SchedWriteWidths sched> { 11727 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 11728 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 11729 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 11730 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11731 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 11732 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 11733} 11734 11735multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 11736 SDNode OpNode, X86SchedWriteWidths sched> { 11737 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 11738 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 11739 VEX_W, EVEX_CD8<16, CD8VF>; 11740 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 11741 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11742 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 11743 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11744} 11745 11746// Concat & Shift 11747defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 11748defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 11749defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 11750defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 11751 11752// Compress 11753defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 11754 avx512vl_i8_info, HasVBMI2>, EVEX, 11755 NotMemoryFoldable; 11756defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 11757 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 11758 NotMemoryFoldable; 11759// Expand 11760defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 11761 avx512vl_i8_info, HasVBMI2>, EVEX; 11762defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 11763 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 11764 11765//===----------------------------------------------------------------------===// 11766// VNNI 11767//===----------------------------------------------------------------------===// 11768 11769let Constraints = "$src1 = $dst" in 11770multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11771 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11772 bit IsCommutable> { 11773 let ExeDomain = VTI.ExeDomain in { 11774 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11775 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11776 "$src3, $src2", "$src2, $src3", 11777 (VTI.VT (OpNode VTI.RC:$src1, 11778 VTI.RC:$src2, VTI.RC:$src3)), 11779 IsCommutable, IsCommutable>, 11780 EVEX_4V, T8PD, Sched<[sched]>; 11781 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11782 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11783 "$src3, $src2", "$src2, $src3", 11784 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11785 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11786 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 11787 Sched<[sched.Folded, sched.ReadAfterFold]>; 11788 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11789 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 11790 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 11791 "$src2, ${src3}"#VTI.BroadcastStr, 11792 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11793 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 11794 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 11795 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 11796 } 11797} 11798 11799multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 11800 X86SchedWriteWidths sched, bit IsCommutable> { 11801 let Predicates = [HasVNNI] in 11802 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 11803 IsCommutable>, EVEX_V512; 11804 let Predicates = [HasVNNI, HasVLX] in { 11805 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 11806 IsCommutable>, EVEX_V256; 11807 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 11808 IsCommutable>, EVEX_V128; 11809 } 11810} 11811 11812// FIXME: Is there a better scheduler class for VPDP? 11813defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 11814defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 11815defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 11816defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 11817 11818// Patterns to match VPDPWSSD from existing instructions/intrinsics. 11819let Predicates = [HasVNNI] in { 11820 def : Pat<(v16i32 (add VR512:$src1, 11821 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 11822 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 11823 def : Pat<(v16i32 (add VR512:$src1, 11824 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 11825 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 11826} 11827let Predicates = [HasVNNI,HasVLX] in { 11828 def : Pat<(v8i32 (add VR256X:$src1, 11829 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 11830 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 11831 def : Pat<(v8i32 (add VR256X:$src1, 11832 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 11833 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 11834 def : Pat<(v4i32 (add VR128X:$src1, 11835 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 11836 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 11837 def : Pat<(v4i32 (add VR128X:$src1, 11838 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 11839 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 11840} 11841 11842//===----------------------------------------------------------------------===// 11843// Bit Algorithms 11844//===----------------------------------------------------------------------===// 11845 11846// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 11847defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 11848 avx512vl_i8_info, HasBITALG>; 11849defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 11850 avx512vl_i16_info, HasBITALG>, VEX_W; 11851 11852defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 11853defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 11854 11855def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 11856 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 11857 return N->hasOneUse(); 11858}]>; 11859 11860multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11861 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 11862 (ins VTI.RC:$src1, VTI.RC:$src2), 11863 "vpshufbitqmb", 11864 "$src2, $src1", "$src1, $src2", 11865 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11866 (VTI.VT VTI.RC:$src2)), 11867 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 11868 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 11869 Sched<[sched]>; 11870 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 11871 (ins VTI.RC:$src1, VTI.MemOp:$src2), 11872 "vpshufbitqmb", 11873 "$src2, $src1", "$src1, $src2", 11874 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11875 (VTI.VT (VTI.LdFrag addr:$src2))), 11876 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 11877 (VTI.VT (VTI.LdFrag addr:$src2)))>, 11878 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 11879 Sched<[sched.Folded, sched.ReadAfterFold]>; 11880} 11881 11882multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11883 let Predicates = [HasBITALG] in 11884 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 11885 let Predicates = [HasBITALG, HasVLX] in { 11886 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 11887 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 11888 } 11889} 11890 11891// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 11892defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 11893 11894//===----------------------------------------------------------------------===// 11895// GFNI 11896//===----------------------------------------------------------------------===// 11897 11898multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11899 X86SchedWriteWidths sched> { 11900 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11901 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 11902 EVEX_V512; 11903 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11904 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 11905 EVEX_V256; 11906 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 11907 EVEX_V128; 11908 } 11909} 11910 11911defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 11912 SchedWriteVecALU>, 11913 EVEX_CD8<8, CD8VF>, T8PD; 11914 11915multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 11916 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11917 X86VectorVTInfo BcstVTI> 11918 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 11919 let ExeDomain = VTI.ExeDomain in 11920 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11921 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 11922 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 11923 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 11924 (OpNode (VTI.VT VTI.RC:$src1), 11925 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 11926 (i8 timm:$src3))>, EVEX_B, 11927 Sched<[sched.Folded, sched.ReadAfterFold]>; 11928} 11929 11930multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11931 X86SchedWriteWidths sched> { 11932 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11933 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 11934 v64i8_info, v8i64_info>, EVEX_V512; 11935 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11936 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 11937 v32i8x_info, v4i64x_info>, EVEX_V256; 11938 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 11939 v16i8x_info, v2i64x_info>, EVEX_V128; 11940 } 11941} 11942 11943defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 11944 X86GF2P8affineinvqb, SchedWriteVecIMul>, 11945 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 11946defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 11947 X86GF2P8affineqb, SchedWriteVecIMul>, 11948 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 11949 11950 11951//===----------------------------------------------------------------------===// 11952// AVX5124FMAPS 11953//===----------------------------------------------------------------------===// 11954 11955let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 11956 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 11957defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 11958 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11959 "v4fmaddps", "$src3, $src2", "$src2, $src3", 11960 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11961 Sched<[SchedWriteFMA.ZMM.Folded]>; 11962 11963defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 11964 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11965 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 11966 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11967 Sched<[SchedWriteFMA.ZMM.Folded]>; 11968 11969defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 11970 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 11971 "v4fmaddss", "$src3, $src2", "$src2, $src3", 11972 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 11973 Sched<[SchedWriteFMA.Scl.Folded]>; 11974 11975defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 11976 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 11977 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 11978 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 11979 Sched<[SchedWriteFMA.Scl.Folded]>; 11980} 11981 11982//===----------------------------------------------------------------------===// 11983// AVX5124VNNIW 11984//===----------------------------------------------------------------------===// 11985 11986let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 11987 Constraints = "$src1 = $dst" in { 11988defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 11989 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11990 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 11991 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11992 Sched<[SchedWriteFMA.ZMM.Folded]>; 11993 11994defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 11995 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11996 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 11997 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11998 Sched<[SchedWriteFMA.ZMM.Folded]>; 11999} 12000 12001let hasSideEffects = 0 in { 12002 let mayStore = 1, SchedRW = [WriteFStoreX] in 12003 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12004 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12005 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12006} 12007 12008//===----------------------------------------------------------------------===// 12009// VP2INTERSECT 12010//===----------------------------------------------------------------------===// 12011 12012multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12013 def rr : I<0x68, MRMSrcReg, 12014 (outs _.KRPC:$dst), 12015 (ins _.RC:$src1, _.RC:$src2), 12016 !strconcat("vp2intersect", _.Suffix, 12017 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12018 [(set _.KRPC:$dst, (X86vp2intersect 12019 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12020 EVEX_4V, T8XD, Sched<[sched]>; 12021 12022 def rm : I<0x68, MRMSrcMem, 12023 (outs _.KRPC:$dst), 12024 (ins _.RC:$src1, _.MemOp:$src2), 12025 !strconcat("vp2intersect", _.Suffix, 12026 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12027 [(set _.KRPC:$dst, (X86vp2intersect 12028 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12029 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12030 Sched<[sched.Folded, sched.ReadAfterFold]>; 12031 12032 def rmb : I<0x68, MRMSrcMem, 12033 (outs _.KRPC:$dst), 12034 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12035 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12036 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12037 [(set _.KRPC:$dst, (X86vp2intersect 12038 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12039 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12040 Sched<[sched.Folded, sched.ReadAfterFold]>; 12041} 12042 12043multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12044 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12045 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12046 12047 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12048 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12049 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12050 } 12051} 12052 12053defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12054defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W; 12055 12056multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12057 X86SchedWriteWidths sched, 12058 AVX512VLVectorVTInfo _SrcVTInfo, 12059 AVX512VLVectorVTInfo _DstVTInfo, 12060 SDNode OpNode, Predicate prd, 12061 bit IsCommutable = 0> { 12062 let Predicates = [prd] in 12063 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12064 _SrcVTInfo.info512, _DstVTInfo.info512, 12065 _SrcVTInfo.info512, IsCommutable>, 12066 EVEX_V512, EVEX_CD8<32, CD8VF>; 12067 let Predicates = [HasVLX, prd] in { 12068 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12069 _SrcVTInfo.info256, _DstVTInfo.info256, 12070 _SrcVTInfo.info256, IsCommutable>, 12071 EVEX_V256, EVEX_CD8<32, CD8VF>; 12072 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12073 _SrcVTInfo.info128, _DstVTInfo.info128, 12074 _SrcVTInfo.info128, IsCommutable>, 12075 EVEX_V128, EVEX_CD8<32, CD8VF>; 12076 } 12077} 12078 12079let ExeDomain = SSEPackedSingle in 12080defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12081 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12082 avx512vl_f32_info, avx512vl_i16_info, 12083 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12084 12085// Truncate Float to BFloat16 12086multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12087 X86SchedWriteWidths sched> { 12088 let ExeDomain = SSEPackedSingle in { 12089 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12090 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12091 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12092 } 12093 let Predicates = [HasBF16, HasVLX] in { 12094 let Uses = []<Register>, mayRaiseFPException = 0 in { 12095 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12096 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12097 VK4WM>, EVEX_V128; 12098 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12099 X86cvtneps2bf16, X86cvtneps2bf16, 12100 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12101 } 12102 } // Predicates = [HasBF16, HasVLX] 12103 } // ExeDomain = SSEPackedSingle 12104 12105 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12106 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12107 VR128X:$src), 0>; 12108 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12109 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12110 f128mem:$src), 0, "intel">; 12111 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12112 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12113 VR256X:$src), 0>; 12114 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12115 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12116 f256mem:$src), 0, "intel">; 12117} 12118 12119defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12120 SchedWriteCvtPD2PS>, T8XS, 12121 EVEX_CD8<32, CD8VF>; 12122 12123let Predicates = [HasBF16, HasVLX] in { 12124 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12125 // patterns have been disabled with null_frag. 12126 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12127 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12128 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12129 VK4WM:$mask), 12130 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12131 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12132 VK4WM:$mask), 12133 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12134 12135 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12136 (VCVTNEPS2BF16Z128rm addr:$src)>; 12137 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12138 VK4WM:$mask), 12139 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12140 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12141 VK4WM:$mask), 12142 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12143 12144 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12145 (X86VBroadcastld32 addr:$src)))), 12146 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12147 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12148 (v8i16 VR128X:$src0), VK4WM:$mask), 12149 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12150 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12151 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12152 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12153} 12154 12155let Constraints = "$src1 = $dst" in { 12156multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12157 X86FoldableSchedWrite sched, 12158 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12159 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12160 (ins src_v.RC:$src2, src_v.RC:$src3), 12161 OpcodeStr, "$src3, $src2", "$src2, $src3", 12162 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12163 EVEX_4V, Sched<[sched]>; 12164 12165 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12166 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12167 OpcodeStr, "$src3, $src2", "$src2, $src3", 12168 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12169 (src_v.LdFrag addr:$src3)))>, EVEX_4V, 12170 Sched<[sched.Folded, sched.ReadAfterFold]>; 12171 12172 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12173 (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3), 12174 OpcodeStr, 12175 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12176 !strconcat("$src2, ${src3}", _.BroadcastStr), 12177 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12178 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12179 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 12180 12181} 12182} // Constraints = "$src1 = $dst" 12183 12184multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12185 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12186 AVX512VLVectorVTInfo src_v, Predicate prd> { 12187 let Predicates = [prd] in { 12188 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12189 src_v.info512>, EVEX_V512; 12190 } 12191 let Predicates = [HasVLX, prd] in { 12192 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12193 src_v.info256>, EVEX_V256; 12194 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12195 src_v.info128>, EVEX_V128; 12196 } 12197} 12198 12199let ExeDomain = SSEPackedSingle in 12200defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12201 avx512vl_f32_info, avx512vl_i32_info, 12202 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12203