1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 32), 4, 48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; 49 50 // The vector VT. 51 ValueType VT = !cast<ValueType>(VTName); 52 53 string EltTypeName = !cast<string>(EltVT); 54 // Size of the element type in bits, e.g. 32 for v16i32. 55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 56 int EltSize = EltVT.Size; 57 58 // "i" for integer types and "f" for floating-point types 59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 60 61 // Size of RC in bits, e.g. 512 for VR512. 62 int Size = VT.Size; 63 64 // The corresponding memory operand, e.g. i512mem for VR512. 65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 67 // FP scalar memory operand for intrinsics - ssmem/sdmem. 68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)); 70 71 // Load patterns 72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 73 74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 75 76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 77 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 78 79 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"), 80 !cast<PatFrags>("sse_load_f32"), 81 !if (!eq (EltTypeName, "f64"), 82 !cast<PatFrags>("sse_load_f64"), 83 ?)); 84 85 // The string to specify embedded broadcast in assembly. 86 string BroadcastStr = "{1to" # NumElts # "}"; 87 88 // 8-bit compressed displacement tuple/subvector format. This is only 89 // defined for NumElts <= 8. 90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 91 !cast<CD8VForm>("CD8VT" # NumElts), ?); 92 93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 94 !if (!eq (Size, 256), sub_ymm, ?)); 95 96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 98 SSEPackedInt)); 99 100 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); 101 102 dag ImmAllZerosV = (VT immAllZerosV); 103 104 string ZSuffix = !if (!eq (Size, 128), "Z128", 105 !if (!eq (Size, 256), "Z256", "Z")); 106} 107 108def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 111def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 113def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 114 115// "x" in v32i8x_info means RC = VR256X 116def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 118def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 119def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 120def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 121def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 122 123def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 124def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 125def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 126def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 127def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 128def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 129 130// We map scalar types to the smallest (128-bit) vector type 131// with the appropriate element type. This allows to use the same masking logic. 132def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 133def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 134def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 135def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 136 137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 138 X86VectorVTInfo i128> { 139 X86VectorVTInfo info512 = i512; 140 X86VectorVTInfo info256 = i256; 141 X86VectorVTInfo info128 = i128; 142} 143 144def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 145 v16i8x_info>; 146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 147 v8i16x_info>; 148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 149 v4i32x_info>; 150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 151 v2i64x_info>; 152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 153 v4f32x_info>; 154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 155 v2f64x_info>; 156 157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 158 ValueType _vt> { 159 RegisterClass KRC = _krc; 160 RegisterClass KRCWM = _krcwm; 161 ValueType KVT = _vt; 162} 163 164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 171 172// Used for matching masked operations. Ensures the operation part only has a 173// single use. 174def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 175 (vselect node:$mask, node:$src1, node:$src2), [{ 176 return isProfitableToFormMaskedOp(N); 177}]>; 178 179def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 180 (X86selects node:$mask, node:$src1, node:$src2), [{ 181 return isProfitableToFormMaskedOp(N); 182}]>; 183 184// This multiclass generates the masking variants from the non-masking 185// variant. It only provides the assembly pieces for the masking variants. 186// It assumes custom ISel patterns for masking which can be provided as 187// template arguments. 188multiclass AVX512_maskable_custom<bits<8> O, Format F, 189 dag Outs, 190 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 191 string OpcodeStr, 192 string AttSrcAsm, string IntelSrcAsm, 193 list<dag> Pattern, 194 list<dag> MaskingPattern, 195 list<dag> ZeroMaskingPattern, 196 string MaskingConstraint = "", 197 bit IsCommutable = 0, 198 bit IsKCommutable = 0, 199 bit IsKZCommutable = IsCommutable> { 200 let isCommutable = IsCommutable in 201 def NAME: AVX512<O, F, Outs, Ins, 202 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 203 "$dst, "#IntelSrcAsm#"}", 204 Pattern>; 205 206 // Prefer over VMOV*rrk Pat<> 207 let isCommutable = IsKCommutable in 208 def NAME#k: AVX512<O, F, Outs, MaskingIns, 209 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 210 "$dst {${mask}}, "#IntelSrcAsm#"}", 211 MaskingPattern>, 212 EVEX_K { 213 // In case of the 3src subclass this is overridden with a let. 214 string Constraints = MaskingConstraint; 215 } 216 217 // Zero mask does not add any restrictions to commute operands transformation. 218 // So, it is Ok to use IsCommutable instead of IsKCommutable. 219 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<> 220 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 221 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 222 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 223 ZeroMaskingPattern>, 224 EVEX_KZ; 225} 226 227 228// Common base class of AVX512_maskable and AVX512_maskable_3src. 229multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 230 dag Outs, 231 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 232 string OpcodeStr, 233 string AttSrcAsm, string IntelSrcAsm, 234 dag RHS, dag MaskingRHS, 235 SDPatternOperator Select = vselect_mask, 236 string MaskingConstraint = "", 237 bit IsCommutable = 0, 238 bit IsKCommutable = 0, 239 bit IsKZCommutable = IsCommutable> : 240 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 241 AttSrcAsm, IntelSrcAsm, 242 [(set _.RC:$dst, RHS)], 243 [(set _.RC:$dst, MaskingRHS)], 244 [(set _.RC:$dst, 245 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 246 MaskingConstraint, IsCommutable, 247 IsKCommutable, IsKZCommutable>; 248 249// This multiclass generates the unconditional/non-masking, the masking and 250// the zero-masking variant of the vector instruction. In the masking case, the 251// preserved vector elements come from a new dummy input operand tied to $dst. 252// This version uses a separate dag for non-masking and masking. 253multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 254 dag Outs, dag Ins, string OpcodeStr, 255 string AttSrcAsm, string IntelSrcAsm, 256 dag RHS, dag MaskRHS, 257 bit IsCommutable = 0, bit IsKCommutable = 0, 258 bit IsKZCommutable = IsCommutable> : 259 AVX512_maskable_custom<O, F, Outs, Ins, 260 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 261 !con((ins _.KRCWM:$mask), Ins), 262 OpcodeStr, AttSrcAsm, IntelSrcAsm, 263 [(set _.RC:$dst, RHS)], 264 [(set _.RC:$dst, 265 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 266 [(set _.RC:$dst, 267 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 268 "$src0 = $dst", IsCommutable, IsKCommutable, 269 IsKZCommutable>; 270 271// This multiclass generates the unconditional/non-masking, the masking and 272// the zero-masking variant of the vector instruction. In the masking case, the 273// preserved vector elements come from a new dummy input operand tied to $dst. 274multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 275 dag Outs, dag Ins, string OpcodeStr, 276 string AttSrcAsm, string IntelSrcAsm, 277 dag RHS, 278 bit IsCommutable = 0, bit IsKCommutable = 0, 279 bit IsKZCommutable = IsCommutable, 280 SDPatternOperator Select = vselect_mask> : 281 AVX512_maskable_common<O, F, _, Outs, Ins, 282 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 283 !con((ins _.KRCWM:$mask), Ins), 284 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 285 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 286 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 287 IsKZCommutable>; 288 289// This multiclass generates the unconditional/non-masking, the masking and 290// the zero-masking variant of the scalar instruction. 291multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 292 dag Outs, dag Ins, string OpcodeStr, 293 string AttSrcAsm, string IntelSrcAsm, 294 dag RHS> : 295 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 296 RHS, 0, 0, 0, X86selects_mask>; 297 298// Similar to AVX512_maskable but in this case one of the source operands 299// ($src1) is already tied to $dst so we just use that for the preserved 300// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 301// $src1. 302multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 303 dag Outs, dag NonTiedIns, string OpcodeStr, 304 string AttSrcAsm, string IntelSrcAsm, 305 dag RHS, 306 bit IsCommutable = 0, 307 bit IsKCommutable = 0, 308 SDPatternOperator Select = vselect_mask, 309 bit MaskOnly = 0> : 310 AVX512_maskable_common<O, F, _, Outs, 311 !con((ins _.RC:$src1), NonTiedIns), 312 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 313 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 314 OpcodeStr, AttSrcAsm, IntelSrcAsm, 315 !if(MaskOnly, (null_frag), RHS), 316 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 317 Select, "", IsCommutable, IsKCommutable>; 318 319// Similar to AVX512_maskable_3src but in this case the input VT for the tied 320// operand differs from the output VT. This requires a bitconvert on 321// the preserved vector going into the vselect. 322// NOTE: The unmasked pattern is disabled. 323multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 324 X86VectorVTInfo InVT, 325 dag Outs, dag NonTiedIns, string OpcodeStr, 326 string AttSrcAsm, string IntelSrcAsm, 327 dag RHS, bit IsCommutable = 0> : 328 AVX512_maskable_common<O, F, OutVT, Outs, 329 !con((ins InVT.RC:$src1), NonTiedIns), 330 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 331 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 332 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 333 (vselect_mask InVT.KRCWM:$mask, RHS, 334 (bitconvert InVT.RC:$src1)), 335 vselect_mask, "", IsCommutable>; 336 337multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 338 dag Outs, dag NonTiedIns, string OpcodeStr, 339 string AttSrcAsm, string IntelSrcAsm, 340 dag RHS, 341 bit IsCommutable = 0, 342 bit IsKCommutable = 0, 343 bit MaskOnly = 0> : 344 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 345 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 346 X86selects_mask, MaskOnly>; 347 348multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 349 dag Outs, dag Ins, 350 string OpcodeStr, 351 string AttSrcAsm, string IntelSrcAsm, 352 list<dag> Pattern> : 353 AVX512_maskable_custom<O, F, Outs, Ins, 354 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 355 !con((ins _.KRCWM:$mask), Ins), 356 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 357 "$src0 = $dst">; 358 359multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 360 dag Outs, dag NonTiedIns, 361 string OpcodeStr, 362 string AttSrcAsm, string IntelSrcAsm, 363 list<dag> Pattern> : 364 AVX512_maskable_custom<O, F, Outs, 365 !con((ins _.RC:$src1), NonTiedIns), 366 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 367 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 368 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 369 "">; 370 371// Instruction with mask that puts result in mask register, 372// like "compare" and "vptest" 373multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 374 dag Outs, 375 dag Ins, dag MaskingIns, 376 string OpcodeStr, 377 string AttSrcAsm, string IntelSrcAsm, 378 list<dag> Pattern, 379 list<dag> MaskingPattern, 380 bit IsCommutable = 0> { 381 let isCommutable = IsCommutable in { 382 def NAME: AVX512<O, F, Outs, Ins, 383 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 384 "$dst, "#IntelSrcAsm#"}", 385 Pattern>; 386 387 def NAME#k: AVX512<O, F, Outs, MaskingIns, 388 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 389 "$dst {${mask}}, "#IntelSrcAsm#"}", 390 MaskingPattern>, EVEX_K; 391 } 392} 393 394multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 395 dag Outs, 396 dag Ins, dag MaskingIns, 397 string OpcodeStr, 398 string AttSrcAsm, string IntelSrcAsm, 399 dag RHS, dag MaskingRHS, 400 bit IsCommutable = 0> : 401 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 402 AttSrcAsm, IntelSrcAsm, 403 [(set _.KRC:$dst, RHS)], 404 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 405 406multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 407 dag Outs, dag Ins, string OpcodeStr, 408 string AttSrcAsm, string IntelSrcAsm, 409 dag RHS, dag RHS_su, bit IsCommutable = 0> : 410 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 411 !con((ins _.KRCWM:$mask), Ins), 412 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 413 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 414 415// Used by conversion instructions. 416multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 417 dag Outs, 418 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 419 string OpcodeStr, 420 string AttSrcAsm, string IntelSrcAsm, 421 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 422 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 423 AttSrcAsm, IntelSrcAsm, 424 [(set _.RC:$dst, RHS)], 425 [(set _.RC:$dst, MaskingRHS)], 426 [(set _.RC:$dst, ZeroMaskingRHS)], 427 "$src0 = $dst">; 428 429multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 430 dag Outs, dag NonTiedIns, string OpcodeStr, 431 string AttSrcAsm, string IntelSrcAsm, 432 dag RHS, dag MaskingRHS, bit IsCommutable, 433 bit IsKCommutable> : 434 AVX512_maskable_custom<O, F, Outs, 435 !con((ins _.RC:$src1), NonTiedIns), 436 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 437 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 438 OpcodeStr, AttSrcAsm, IntelSrcAsm, 439 [(set _.RC:$dst, RHS)], 440 [(set _.RC:$dst, 441 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 442 [(set _.RC:$dst, 443 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 444 "", IsCommutable, IsKCommutable>; 445 446// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 447// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 448// swizzled by ExecutionDomainFix to pxor. 449// We set canFoldAsLoad because this can be converted to a constant-pool 450// load of an all-zeros value if folding it would be beneficial. 451let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 452 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 453def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 454 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 455def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 456 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 457} 458 459let Predicates = [HasAVX512] in { 460def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 461def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 462def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 463def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 464def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 465} 466 467// Alias instructions that allow VPTERNLOG to be used with a mask to create 468// a mix of all ones and all zeros elements. This is done this way to force 469// the same register to be used as input for all three sources. 470let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 471def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 472 (ins VK16WM:$mask), "", 473 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 474 (v16i32 immAllOnesV), 475 (v16i32 immAllZerosV)))]>; 476def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 477 (ins VK8WM:$mask), "", 478 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 479 (v8i64 immAllOnesV), 480 (v8i64 immAllZerosV)))]>; 481} 482 483let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 484 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 485def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 486 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 487def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 488 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 489} 490 491let Predicates = [HasAVX512] in { 492def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 493def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 494def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 495def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 496def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 497def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 498def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 499def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 500def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 501def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 502} 503 504// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 505// This is expanded by ExpandPostRAPseudos. 506let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 507 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 508 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 509 [(set FR32X:$dst, fp32imm0)]>; 510 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 511 [(set FR64X:$dst, fp64imm0)]>; 512 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 513 [(set VR128X:$dst, fp128imm0)]>; 514} 515 516//===----------------------------------------------------------------------===// 517// AVX-512 - VECTOR INSERT 518// 519 520// Supports two different pattern operators for mask and unmasked ops. Allows 521// null_frag to be passed for one. 522multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 523 X86VectorVTInfo To, 524 SDPatternOperator vinsert_insert, 525 SDPatternOperator vinsert_for_mask, 526 X86FoldableSchedWrite sched> { 527 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 528 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 529 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 530 "vinsert" # From.EltTypeName # "x" # From.NumElts, 531 "$src3, $src2, $src1", "$src1, $src2, $src3", 532 (vinsert_insert:$src3 (To.VT To.RC:$src1), 533 (From.VT From.RC:$src2), 534 (iPTR imm)), 535 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 536 (From.VT From.RC:$src2), 537 (iPTR imm))>, 538 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 539 let mayLoad = 1 in 540 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 541 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 542 "vinsert" # From.EltTypeName # "x" # From.NumElts, 543 "$src3, $src2, $src1", "$src1, $src2, $src3", 544 (vinsert_insert:$src3 (To.VT To.RC:$src1), 545 (From.VT (From.LdFrag addr:$src2)), 546 (iPTR imm)), 547 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 548 (From.VT (From.LdFrag addr:$src2)), 549 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 550 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 551 Sched<[sched.Folded, sched.ReadAfterFold]>; 552 } 553} 554 555// Passes the same pattern operator for masked and unmasked ops. 556multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 557 X86VectorVTInfo To, 558 SDPatternOperator vinsert_insert, 559 X86FoldableSchedWrite sched> : 560 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 561 562multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 563 X86VectorVTInfo To, PatFrag vinsert_insert, 564 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 565 let Predicates = p in { 566 def : Pat<(vinsert_insert:$ins 567 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 568 (To.VT (!cast<Instruction>(InstrStr#"rr") 569 To.RC:$src1, From.RC:$src2, 570 (INSERT_get_vinsert_imm To.RC:$ins)))>; 571 572 def : Pat<(vinsert_insert:$ins 573 (To.VT To.RC:$src1), 574 (From.VT (From.LdFrag addr:$src2)), 575 (iPTR imm)), 576 (To.VT (!cast<Instruction>(InstrStr#"rm") 577 To.RC:$src1, addr:$src2, 578 (INSERT_get_vinsert_imm To.RC:$ins)))>; 579 } 580} 581 582multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 583 ValueType EltVT64, int Opcode256, 584 X86FoldableSchedWrite sched> { 585 586 let Predicates = [HasVLX] in 587 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 588 X86VectorVTInfo< 4, EltVT32, VR128X>, 589 X86VectorVTInfo< 8, EltVT32, VR256X>, 590 vinsert128_insert, sched>, EVEX_V256; 591 592 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 593 X86VectorVTInfo< 4, EltVT32, VR128X>, 594 X86VectorVTInfo<16, EltVT32, VR512>, 595 vinsert128_insert, sched>, EVEX_V512; 596 597 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 598 X86VectorVTInfo< 4, EltVT64, VR256X>, 599 X86VectorVTInfo< 8, EltVT64, VR512>, 600 vinsert256_insert, sched>, VEX_W, EVEX_V512; 601 602 // Even with DQI we'd like to only use these instructions for masking. 603 let Predicates = [HasVLX, HasDQI] in 604 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 605 X86VectorVTInfo< 2, EltVT64, VR128X>, 606 X86VectorVTInfo< 4, EltVT64, VR256X>, 607 null_frag, vinsert128_insert, sched>, 608 VEX_W1X, EVEX_V256; 609 610 // Even with DQI we'd like to only use these instructions for masking. 611 let Predicates = [HasDQI] in { 612 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 613 X86VectorVTInfo< 2, EltVT64, VR128X>, 614 X86VectorVTInfo< 8, EltVT64, VR512>, 615 null_frag, vinsert128_insert, sched>, 616 VEX_W, EVEX_V512; 617 618 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 619 X86VectorVTInfo< 8, EltVT32, VR256X>, 620 X86VectorVTInfo<16, EltVT32, VR512>, 621 null_frag, vinsert256_insert, sched>, 622 EVEX_V512; 623 } 624} 625 626// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 627defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 628defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 629 630// Codegen pattern with the alternative types, 631// Even with AVX512DQ we'll still use these for unmasked operations. 632defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 633 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 634defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 635 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 636 637defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 638 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 639defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 640 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 641 642defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 643 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 644defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 645 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 646 647// Codegen pattern with the alternative types insert VEC128 into VEC256 648defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 649 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 650defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 651 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 652// Codegen pattern with the alternative types insert VEC128 into VEC512 653defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 654 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 655defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 656 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 657// Codegen pattern with the alternative types insert VEC256 into VEC512 658defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 659 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 660defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 661 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 662 663 664multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 665 X86VectorVTInfo To, X86VectorVTInfo Cast, 666 PatFrag vinsert_insert, 667 SDNodeXForm INSERT_get_vinsert_imm, 668 list<Predicate> p> { 669let Predicates = p in { 670 def : Pat<(Cast.VT 671 (vselect_mask Cast.KRCWM:$mask, 672 (bitconvert 673 (vinsert_insert:$ins (To.VT To.RC:$src1), 674 (From.VT From.RC:$src2), 675 (iPTR imm))), 676 Cast.RC:$src0)), 677 (!cast<Instruction>(InstrStr#"rrk") 678 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 679 (INSERT_get_vinsert_imm To.RC:$ins))>; 680 def : Pat<(Cast.VT 681 (vselect_mask Cast.KRCWM:$mask, 682 (bitconvert 683 (vinsert_insert:$ins (To.VT To.RC:$src1), 684 (From.VT 685 (bitconvert 686 (From.LdFrag addr:$src2))), 687 (iPTR imm))), 688 Cast.RC:$src0)), 689 (!cast<Instruction>(InstrStr#"rmk") 690 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 691 (INSERT_get_vinsert_imm To.RC:$ins))>; 692 693 def : Pat<(Cast.VT 694 (vselect_mask Cast.KRCWM:$mask, 695 (bitconvert 696 (vinsert_insert:$ins (To.VT To.RC:$src1), 697 (From.VT From.RC:$src2), 698 (iPTR imm))), 699 Cast.ImmAllZerosV)), 700 (!cast<Instruction>(InstrStr#"rrkz") 701 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 702 (INSERT_get_vinsert_imm To.RC:$ins))>; 703 def : Pat<(Cast.VT 704 (vselect_mask Cast.KRCWM:$mask, 705 (bitconvert 706 (vinsert_insert:$ins (To.VT To.RC:$src1), 707 (From.VT (From.LdFrag addr:$src2)), 708 (iPTR imm))), 709 Cast.ImmAllZerosV)), 710 (!cast<Instruction>(InstrStr#"rmkz") 711 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 712 (INSERT_get_vinsert_imm To.RC:$ins))>; 713} 714} 715 716defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 717 v8f32x_info, vinsert128_insert, 718 INSERT_get_vinsert128_imm, [HasVLX]>; 719defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 720 v4f64x_info, vinsert128_insert, 721 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 722 723defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 724 v8i32x_info, vinsert128_insert, 725 INSERT_get_vinsert128_imm, [HasVLX]>; 726defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 727 v8i32x_info, vinsert128_insert, 728 INSERT_get_vinsert128_imm, [HasVLX]>; 729defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 730 v8i32x_info, vinsert128_insert, 731 INSERT_get_vinsert128_imm, [HasVLX]>; 732defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 733 v4i64x_info, vinsert128_insert, 734 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 735defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 736 v4i64x_info, vinsert128_insert, 737 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 738defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 739 v4i64x_info, vinsert128_insert, 740 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 741 742defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 743 v16f32_info, vinsert128_insert, 744 INSERT_get_vinsert128_imm, [HasAVX512]>; 745defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 746 v8f64_info, vinsert128_insert, 747 INSERT_get_vinsert128_imm, [HasDQI]>; 748 749defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 750 v16i32_info, vinsert128_insert, 751 INSERT_get_vinsert128_imm, [HasAVX512]>; 752defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 753 v16i32_info, vinsert128_insert, 754 INSERT_get_vinsert128_imm, [HasAVX512]>; 755defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 756 v16i32_info, vinsert128_insert, 757 INSERT_get_vinsert128_imm, [HasAVX512]>; 758defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 759 v8i64_info, vinsert128_insert, 760 INSERT_get_vinsert128_imm, [HasDQI]>; 761defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 762 v8i64_info, vinsert128_insert, 763 INSERT_get_vinsert128_imm, [HasDQI]>; 764defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 765 v8i64_info, vinsert128_insert, 766 INSERT_get_vinsert128_imm, [HasDQI]>; 767 768defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 769 v16f32_info, vinsert256_insert, 770 INSERT_get_vinsert256_imm, [HasDQI]>; 771defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 772 v8f64_info, vinsert256_insert, 773 INSERT_get_vinsert256_imm, [HasAVX512]>; 774 775defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 776 v16i32_info, vinsert256_insert, 777 INSERT_get_vinsert256_imm, [HasDQI]>; 778defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 779 v16i32_info, vinsert256_insert, 780 INSERT_get_vinsert256_imm, [HasDQI]>; 781defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 782 v16i32_info, vinsert256_insert, 783 INSERT_get_vinsert256_imm, [HasDQI]>; 784defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 785 v8i64_info, vinsert256_insert, 786 INSERT_get_vinsert256_imm, [HasAVX512]>; 787defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 788 v8i64_info, vinsert256_insert, 789 INSERT_get_vinsert256_imm, [HasAVX512]>; 790defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 791 v8i64_info, vinsert256_insert, 792 INSERT_get_vinsert256_imm, [HasAVX512]>; 793 794// vinsertps - insert f32 to XMM 795let ExeDomain = SSEPackedSingle in { 796let isCommutable = 1 in 797def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 798 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 799 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 800 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 801 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 802def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 803 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 804 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 805 [(set VR128X:$dst, (X86insertps VR128X:$src1, 806 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 807 timm:$src3))]>, 808 EVEX_4V, EVEX_CD8<32, CD8VT1>, 809 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 810} 811 812//===----------------------------------------------------------------------===// 813// AVX-512 VECTOR EXTRACT 814//--- 815 816// Supports two different pattern operators for mask and unmasked ops. Allows 817// null_frag to be passed for one. 818multiclass vextract_for_size_split<int Opcode, 819 X86VectorVTInfo From, X86VectorVTInfo To, 820 SDPatternOperator vextract_extract, 821 SDPatternOperator vextract_for_mask, 822 SchedWrite SchedRR, SchedWrite SchedMR> { 823 824 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 825 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 826 (ins From.RC:$src1, u8imm:$idx), 827 "vextract" # To.EltTypeName # "x" # To.NumElts, 828 "$idx, $src1", "$src1, $idx", 829 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 830 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 831 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 832 833 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 834 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 835 "vextract" # To.EltTypeName # "x" # To.NumElts # 836 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 837 [(store (To.VT (vextract_extract:$idx 838 (From.VT From.RC:$src1), (iPTR imm))), 839 addr:$dst)]>, EVEX, 840 Sched<[SchedMR]>; 841 842 let mayStore = 1, hasSideEffects = 0 in 843 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 844 (ins To.MemOp:$dst, To.KRCWM:$mask, 845 From.RC:$src1, u8imm:$idx), 846 "vextract" # To.EltTypeName # "x" # To.NumElts # 847 "\t{$idx, $src1, $dst {${mask}}|" 848 "$dst {${mask}}, $src1, $idx}", []>, 849 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 850 } 851} 852 853// Passes the same pattern operator for masked and unmasked ops. 854multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 855 X86VectorVTInfo To, 856 SDPatternOperator vextract_extract, 857 SchedWrite SchedRR, SchedWrite SchedMR> : 858 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 859 860// Codegen pattern for the alternative types 861multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 862 X86VectorVTInfo To, PatFrag vextract_extract, 863 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 864 let Predicates = p in { 865 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 866 (To.VT (!cast<Instruction>(InstrStr#"rr") 867 From.RC:$src1, 868 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 869 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 870 (iPTR imm))), addr:$dst), 871 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 872 (EXTRACT_get_vextract_imm To.RC:$ext))>; 873 } 874} 875 876multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 877 ValueType EltVT64, int Opcode256, 878 SchedWrite SchedRR, SchedWrite SchedMR> { 879 let Predicates = [HasAVX512] in { 880 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 881 X86VectorVTInfo<16, EltVT32, VR512>, 882 X86VectorVTInfo< 4, EltVT32, VR128X>, 883 vextract128_extract, SchedRR, SchedMR>, 884 EVEX_V512, EVEX_CD8<32, CD8VT4>; 885 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 886 X86VectorVTInfo< 8, EltVT64, VR512>, 887 X86VectorVTInfo< 4, EltVT64, VR256X>, 888 vextract256_extract, SchedRR, SchedMR>, 889 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 890 } 891 let Predicates = [HasVLX] in 892 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 893 X86VectorVTInfo< 8, EltVT32, VR256X>, 894 X86VectorVTInfo< 4, EltVT32, VR128X>, 895 vextract128_extract, SchedRR, SchedMR>, 896 EVEX_V256, EVEX_CD8<32, CD8VT4>; 897 898 // Even with DQI we'd like to only use these instructions for masking. 899 let Predicates = [HasVLX, HasDQI] in 900 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 901 X86VectorVTInfo< 4, EltVT64, VR256X>, 902 X86VectorVTInfo< 2, EltVT64, VR128X>, 903 null_frag, vextract128_extract, SchedRR, SchedMR>, 904 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 905 906 // Even with DQI we'd like to only use these instructions for masking. 907 let Predicates = [HasDQI] in { 908 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 909 X86VectorVTInfo< 8, EltVT64, VR512>, 910 X86VectorVTInfo< 2, EltVT64, VR128X>, 911 null_frag, vextract128_extract, SchedRR, SchedMR>, 912 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 913 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 914 X86VectorVTInfo<16, EltVT32, VR512>, 915 X86VectorVTInfo< 8, EltVT32, VR256X>, 916 null_frag, vextract256_extract, SchedRR, SchedMR>, 917 EVEX_V512, EVEX_CD8<32, CD8VT8>; 918 } 919} 920 921// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 922defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 923defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 924 925// extract_subvector codegen patterns with the alternative types. 926// Even with AVX512DQ we'll still use these for unmasked operations. 927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 931 932defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 933 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 934defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 935 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 936 937defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 938 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 941 942// Codegen pattern with the alternative types extract VEC128 from VEC256 943defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 944 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 945defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 946 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 947 948// Codegen pattern with the alternative types extract VEC128 from VEC512 949defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 950 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 951defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 952 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 953// Codegen pattern with the alternative types extract VEC256 from VEC512 954defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 955 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 956defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 957 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 958 959 960// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 961// smaller extract to enable EVEX->VEX. 962let Predicates = [NoVLX] in { 963def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 964 (v2i64 (VEXTRACTI128rr 965 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 966 (iPTR 1)))>; 967def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 968 (v2f64 (VEXTRACTF128rr 969 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 970 (iPTR 1)))>; 971def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 972 (v4i32 (VEXTRACTI128rr 973 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 974 (iPTR 1)))>; 975def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 976 (v4f32 (VEXTRACTF128rr 977 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 978 (iPTR 1)))>; 979def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 980 (v8i16 (VEXTRACTI128rr 981 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 982 (iPTR 1)))>; 983def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 984 (v16i8 (VEXTRACTI128rr 985 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 986 (iPTR 1)))>; 987} 988 989// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 990// smaller extract to enable EVEX->VEX. 991let Predicates = [HasVLX] in { 992def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 993 (v2i64 (VEXTRACTI32x4Z256rr 994 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 995 (iPTR 1)))>; 996def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 997 (v2f64 (VEXTRACTF32x4Z256rr 998 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 999 (iPTR 1)))>; 1000def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1001 (v4i32 (VEXTRACTI32x4Z256rr 1002 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1003 (iPTR 1)))>; 1004def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1005 (v4f32 (VEXTRACTF32x4Z256rr 1006 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1007 (iPTR 1)))>; 1008def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1009 (v8i16 (VEXTRACTI32x4Z256rr 1010 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1011 (iPTR 1)))>; 1012def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1013 (v16i8 (VEXTRACTI32x4Z256rr 1014 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1015 (iPTR 1)))>; 1016} 1017 1018 1019// Additional patterns for handling a bitcast between the vselect and the 1020// extract_subvector. 1021multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 1022 X86VectorVTInfo To, X86VectorVTInfo Cast, 1023 PatFrag vextract_extract, 1024 SDNodeXForm EXTRACT_get_vextract_imm, 1025 list<Predicate> p> { 1026let Predicates = p in { 1027 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1028 (bitconvert 1029 (To.VT (vextract_extract:$ext 1030 (From.VT From.RC:$src), (iPTR imm)))), 1031 To.RC:$src0)), 1032 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1033 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1034 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1035 1036 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1037 (bitconvert 1038 (To.VT (vextract_extract:$ext 1039 (From.VT From.RC:$src), (iPTR imm)))), 1040 Cast.ImmAllZerosV)), 1041 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1042 Cast.KRCWM:$mask, From.RC:$src, 1043 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1044} 1045} 1046 1047defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1048 v4f32x_info, vextract128_extract, 1049 EXTRACT_get_vextract128_imm, [HasVLX]>; 1050defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1051 v2f64x_info, vextract128_extract, 1052 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1053 1054defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1055 v4i32x_info, vextract128_extract, 1056 EXTRACT_get_vextract128_imm, [HasVLX]>; 1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1058 v4i32x_info, vextract128_extract, 1059 EXTRACT_get_vextract128_imm, [HasVLX]>; 1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1061 v4i32x_info, vextract128_extract, 1062 EXTRACT_get_vextract128_imm, [HasVLX]>; 1063defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1064 v2i64x_info, vextract128_extract, 1065 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1067 v2i64x_info, vextract128_extract, 1068 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1070 v2i64x_info, vextract128_extract, 1071 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1072 1073defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1074 v4f32x_info, vextract128_extract, 1075 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1076defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1077 v2f64x_info, vextract128_extract, 1078 EXTRACT_get_vextract128_imm, [HasDQI]>; 1079 1080defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1081 v4i32x_info, vextract128_extract, 1082 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1083defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1084 v4i32x_info, vextract128_extract, 1085 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1086defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1087 v4i32x_info, vextract128_extract, 1088 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1089defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1090 v2i64x_info, vextract128_extract, 1091 EXTRACT_get_vextract128_imm, [HasDQI]>; 1092defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1093 v2i64x_info, vextract128_extract, 1094 EXTRACT_get_vextract128_imm, [HasDQI]>; 1095defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1096 v2i64x_info, vextract128_extract, 1097 EXTRACT_get_vextract128_imm, [HasDQI]>; 1098 1099defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1100 v8f32x_info, vextract256_extract, 1101 EXTRACT_get_vextract256_imm, [HasDQI]>; 1102defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1103 v4f64x_info, vextract256_extract, 1104 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1105 1106defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1107 v8i32x_info, vextract256_extract, 1108 EXTRACT_get_vextract256_imm, [HasDQI]>; 1109defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1110 v8i32x_info, vextract256_extract, 1111 EXTRACT_get_vextract256_imm, [HasDQI]>; 1112defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1113 v8i32x_info, vextract256_extract, 1114 EXTRACT_get_vextract256_imm, [HasDQI]>; 1115defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1116 v4i64x_info, vextract256_extract, 1117 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1118defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1119 v4i64x_info, vextract256_extract, 1120 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1121defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1122 v4i64x_info, vextract256_extract, 1123 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1124 1125// vextractps - extract 32 bits from XMM 1126def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1127 (ins VR128X:$src1, u8imm:$src2), 1128 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1129 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1130 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1131 1132def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1133 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1134 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1135 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1136 addr:$dst)]>, 1137 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1138 1139//===---------------------------------------------------------------------===// 1140// AVX-512 BROADCAST 1141//--- 1142// broadcast with a scalar argument. 1143multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, 1144 string Name, 1145 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { 1146 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1147 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1148 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1149 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1150 (X86VBroadcast SrcInfo.FRC:$src), 1151 DestInfo.RC:$src0)), 1152 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1153 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1154 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1155 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1156 (X86VBroadcast SrcInfo.FRC:$src), 1157 DestInfo.ImmAllZerosV)), 1158 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1159 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1160} 1161 1162// Split version to allow mask and broadcast node to be different types. This 1163// helps support the 32x2 broadcasts. 1164multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1165 string Name, 1166 SchedWrite SchedRR, SchedWrite SchedRM, 1167 X86VectorVTInfo MaskInfo, 1168 X86VectorVTInfo DestInfo, 1169 X86VectorVTInfo SrcInfo, 1170 bit IsConvertibleToThreeAddress, 1171 SDPatternOperator UnmaskedOp = X86VBroadcast, 1172 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1173 let hasSideEffects = 0 in 1174 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1175 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1176 [(set MaskInfo.RC:$dst, 1177 (MaskInfo.VT 1178 (bitconvert 1179 (DestInfo.VT 1180 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1181 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1182 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1183 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1184 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1185 "${dst} {${mask}} {z}, $src}"), 1186 [(set MaskInfo.RC:$dst, 1187 (vselect_mask MaskInfo.KRCWM:$mask, 1188 (MaskInfo.VT 1189 (bitconvert 1190 (DestInfo.VT 1191 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1192 MaskInfo.ImmAllZerosV))], 1193 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1194 let Constraints = "$src0 = $dst" in 1195 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1196 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1197 SrcInfo.RC:$src), 1198 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1199 "${dst} {${mask}}, $src}"), 1200 [(set MaskInfo.RC:$dst, 1201 (vselect_mask MaskInfo.KRCWM:$mask, 1202 (MaskInfo.VT 1203 (bitconvert 1204 (DestInfo.VT 1205 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1206 MaskInfo.RC:$src0))], 1207 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1208 1209 let hasSideEffects = 0, mayLoad = 1 in 1210 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1211 (ins SrcInfo.ScalarMemOp:$src), 1212 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1213 [(set MaskInfo.RC:$dst, 1214 (MaskInfo.VT 1215 (bitconvert 1216 (DestInfo.VT 1217 (UnmaskedBcastOp addr:$src)))))], 1218 DestInfo.ExeDomain>, T8PD, EVEX, 1219 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1220 1221 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1222 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1223 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1224 "${dst} {${mask}} {z}, $src}"), 1225 [(set MaskInfo.RC:$dst, 1226 (vselect_mask MaskInfo.KRCWM:$mask, 1227 (MaskInfo.VT 1228 (bitconvert 1229 (DestInfo.VT 1230 (SrcInfo.BroadcastLdFrag addr:$src)))), 1231 MaskInfo.ImmAllZerosV))], 1232 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1233 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1234 1235 let Constraints = "$src0 = $dst", 1236 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1237 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1238 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1239 SrcInfo.ScalarMemOp:$src), 1240 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1241 "${dst} {${mask}}, $src}"), 1242 [(set MaskInfo.RC:$dst, 1243 (vselect_mask MaskInfo.KRCWM:$mask, 1244 (MaskInfo.VT 1245 (bitconvert 1246 (DestInfo.VT 1247 (SrcInfo.BroadcastLdFrag addr:$src)))), 1248 MaskInfo.RC:$src0))], 1249 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1250 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1251} 1252 1253// Helper class to force mask and broadcast result to same type. 1254multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, 1255 SchedWrite SchedRR, SchedWrite SchedRM, 1256 X86VectorVTInfo DestInfo, 1257 X86VectorVTInfo SrcInfo, 1258 bit IsConvertibleToThreeAddress> : 1259 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, 1260 DestInfo, DestInfo, SrcInfo, 1261 IsConvertibleToThreeAddress>; 1262 1263multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1264 AVX512VLVectorVTInfo _> { 1265 let Predicates = [HasAVX512] in { 1266 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1267 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1268 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1269 _.info128>, 1270 EVEX_V512; 1271 } 1272 1273 let Predicates = [HasVLX] in { 1274 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1275 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1276 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1277 _.info128>, 1278 EVEX_V256; 1279 } 1280} 1281 1282multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1283 AVX512VLVectorVTInfo _> { 1284 let Predicates = [HasAVX512] in { 1285 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1286 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1287 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1288 _.info128>, 1289 EVEX_V512; 1290 } 1291 1292 let Predicates = [HasVLX] in { 1293 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1294 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1295 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1296 _.info128>, 1297 EVEX_V256; 1298 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1299 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1300 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, 1301 _.info128>, 1302 EVEX_V128; 1303 } 1304} 1305defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1306 avx512vl_f32_info>; 1307defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1308 avx512vl_f64_info>, VEX_W1X; 1309 1310multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1311 X86VectorVTInfo _, SDPatternOperator OpNode, 1312 RegisterClass SrcRC> { 1313 // Fold with a mask even if it has multiple uses since it is cheap. 1314 let ExeDomain = _.ExeDomain in 1315 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1316 (ins SrcRC:$src), 1317 "vpbroadcast"#_.Suffix, "$src", "$src", 1318 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1319 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1320 T8PD, EVEX, Sched<[SchedRR]>; 1321} 1322 1323multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1324 X86VectorVTInfo _, SDPatternOperator OpNode, 1325 RegisterClass SrcRC, SubRegIndex Subreg> { 1326 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1327 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1328 (outs _.RC:$dst), (ins GR32:$src), 1329 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1330 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1331 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1332 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1333 1334 def : Pat <(_.VT (OpNode SrcRC:$src)), 1335 (!cast<Instruction>(Name#rr) 1336 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1337 1338 // Fold with a mask even if it has multiple uses since it is cheap. 1339 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1340 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1341 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1342 1343 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1344 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1345 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1346} 1347 1348multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1349 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1350 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1351 let Predicates = [prd] in 1352 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1353 OpNode, SrcRC, Subreg>, EVEX_V512; 1354 let Predicates = [prd, HasVLX] in { 1355 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1356 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1357 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1358 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1359 } 1360} 1361 1362multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1363 SDPatternOperator OpNode, 1364 RegisterClass SrcRC, Predicate prd> { 1365 let Predicates = [prd] in 1366 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1367 SrcRC>, EVEX_V512; 1368 let Predicates = [prd, HasVLX] in { 1369 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1370 SrcRC>, EVEX_V256; 1371 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1372 SrcRC>, EVEX_V128; 1373 } 1374} 1375 1376defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1377 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1378defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1379 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1380 HasBWI>; 1381defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1382 X86VBroadcast, GR32, HasAVX512>; 1383defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1384 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1385 1386multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1387 AVX512VLVectorVTInfo _, Predicate prd, 1388 bit IsConvertibleToThreeAddress> { 1389 let Predicates = [prd] in { 1390 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1391 WriteShuffle256Ld, _.info512, _.info128, 1392 IsConvertibleToThreeAddress>, 1393 EVEX_V512; 1394 } 1395 let Predicates = [prd, HasVLX] in { 1396 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1397 WriteShuffle256Ld, _.info256, _.info128, 1398 IsConvertibleToThreeAddress>, 1399 EVEX_V256; 1400 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, 1401 WriteShuffleXLd, _.info128, _.info128, 1402 IsConvertibleToThreeAddress>, 1403 EVEX_V128; 1404 } 1405} 1406 1407defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1408 avx512vl_i8_info, HasBWI, 0>; 1409defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1410 avx512vl_i16_info, HasBWI, 0>; 1411defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1412 avx512vl_i32_info, HasAVX512, 1>; 1413defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1414 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1415 1416multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1417 SDPatternOperator OpNode, 1418 X86VectorVTInfo _Dst, 1419 X86VectorVTInfo _Src> { 1420 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1421 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1422 (_Dst.VT (OpNode addr:$src))>, 1423 Sched<[SchedWriteShuffle.YMM.Folded]>, 1424 AVX5128IBase, EVEX; 1425} 1426 1427// This should be used for the AVX512DQ broadcast instructions. It disables 1428// the unmasked patterns so that we only use the DQ instructions when masking 1429// is requested. 1430multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1431 SDPatternOperator OpNode, 1432 X86VectorVTInfo _Dst, 1433 X86VectorVTInfo _Src> { 1434 let hasSideEffects = 0, mayLoad = 1 in 1435 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1436 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1437 (null_frag), 1438 (_Dst.VT (OpNode addr:$src))>, 1439 Sched<[SchedWriteShuffle.YMM.Folded]>, 1440 AVX5128IBase, EVEX; 1441} 1442 1443//===----------------------------------------------------------------------===// 1444// AVX-512 BROADCAST SUBVECTORS 1445// 1446 1447defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1448 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1449 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1450defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1451 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1452 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1453defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1454 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, 1455 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1456defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1457 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, 1458 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1459 1460let Predicates = [HasAVX512] in { 1461def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1462 (VBROADCASTF64X4rm addr:$src)>; 1463def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1464 (VBROADCASTF64X4rm addr:$src)>; 1465def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1466 (VBROADCASTI64X4rm addr:$src)>; 1467def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1468 (VBROADCASTI64X4rm addr:$src)>; 1469def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1470 (VBROADCASTI64X4rm addr:$src)>; 1471def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1472 (VBROADCASTI64X4rm addr:$src)>; 1473 1474def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1475 (VBROADCASTF32X4rm addr:$src)>; 1476def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1477 (VBROADCASTF32X4rm addr:$src)>; 1478def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1479 (VBROADCASTI32X4rm addr:$src)>; 1480def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1481 (VBROADCASTI32X4rm addr:$src)>; 1482def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1483 (VBROADCASTI32X4rm addr:$src)>; 1484def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1485 (VBROADCASTI32X4rm addr:$src)>; 1486 1487// Patterns for selects of bitcasted operations. 1488def : Pat<(vselect_mask VK16WM:$mask, 1489 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1490 (v16f32 immAllZerosV)), 1491 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1492def : Pat<(vselect_mask VK16WM:$mask, 1493 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1494 VR512:$src0), 1495 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1496def : Pat<(vselect_mask VK16WM:$mask, 1497 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1498 (v16i32 immAllZerosV)), 1499 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1500def : Pat<(vselect_mask VK16WM:$mask, 1501 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1502 VR512:$src0), 1503 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1504 1505def : Pat<(vselect_mask VK8WM:$mask, 1506 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1507 (v8f64 immAllZerosV)), 1508 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1509def : Pat<(vselect_mask VK8WM:$mask, 1510 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1511 VR512:$src0), 1512 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1513def : Pat<(vselect_mask VK8WM:$mask, 1514 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1515 (v8i64 immAllZerosV)), 1516 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1517def : Pat<(vselect_mask VK8WM:$mask, 1518 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1519 VR512:$src0), 1520 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1521} 1522 1523let Predicates = [HasVLX] in { 1524defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1525 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1526 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1527defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1528 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1529 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1530 1531def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1532 (VBROADCASTF32X4Z256rm addr:$src)>; 1533def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1534 (VBROADCASTF32X4Z256rm addr:$src)>; 1535def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1536 (VBROADCASTI32X4Z256rm addr:$src)>; 1537def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1538 (VBROADCASTI32X4Z256rm addr:$src)>; 1539def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1540 (VBROADCASTI32X4Z256rm addr:$src)>; 1541def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1542 (VBROADCASTI32X4Z256rm addr:$src)>; 1543 1544// Patterns for selects of bitcasted operations. 1545def : Pat<(vselect_mask VK8WM:$mask, 1546 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1547 (v8f32 immAllZerosV)), 1548 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1549def : Pat<(vselect_mask VK8WM:$mask, 1550 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1551 VR256X:$src0), 1552 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1553def : Pat<(vselect_mask VK8WM:$mask, 1554 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1555 (v8i32 immAllZerosV)), 1556 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1557def : Pat<(vselect_mask VK8WM:$mask, 1558 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1559 VR256X:$src0), 1560 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1561} 1562 1563let Predicates = [HasVLX, HasDQI] in { 1564defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1565 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, 1566 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1567defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1568 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, 1569 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1570 1571// Patterns for selects of bitcasted operations. 1572def : Pat<(vselect_mask VK4WM:$mask, 1573 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1574 (v4f64 immAllZerosV)), 1575 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1576def : Pat<(vselect_mask VK4WM:$mask, 1577 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1578 VR256X:$src0), 1579 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1580def : Pat<(vselect_mask VK4WM:$mask, 1581 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1582 (v4i64 immAllZerosV)), 1583 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1584def : Pat<(vselect_mask VK4WM:$mask, 1585 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1586 VR256X:$src0), 1587 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1588} 1589 1590let Predicates = [HasDQI] in { 1591defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1592 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, 1593 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1594defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1595 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1596 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1597defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1598 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, 1599 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1600defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1601 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1602 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1603 1604// Patterns for selects of bitcasted operations. 1605def : Pat<(vselect_mask VK16WM:$mask, 1606 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1607 (v16f32 immAllZerosV)), 1608 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1609def : Pat<(vselect_mask VK16WM:$mask, 1610 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1611 VR512:$src0), 1612 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1613def : Pat<(vselect_mask VK16WM:$mask, 1614 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1615 (v16i32 immAllZerosV)), 1616 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1617def : Pat<(vselect_mask VK16WM:$mask, 1618 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1619 VR512:$src0), 1620 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1621 1622def : Pat<(vselect_mask VK8WM:$mask, 1623 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1624 (v8f64 immAllZerosV)), 1625 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1626def : Pat<(vselect_mask VK8WM:$mask, 1627 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1628 VR512:$src0), 1629 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1630def : Pat<(vselect_mask VK8WM:$mask, 1631 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1632 (v8i64 immAllZerosV)), 1633 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1634def : Pat<(vselect_mask VK8WM:$mask, 1635 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1636 VR512:$src0), 1637 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1638} 1639 1640multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1641 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { 1642 let Predicates = [HasDQI] in 1643 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1644 WriteShuffle256Ld, _Dst.info512, 1645 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1646 EVEX_V512; 1647 let Predicates = [HasDQI, HasVLX] in 1648 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1649 WriteShuffle256Ld, _Dst.info256, 1650 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1651 EVEX_V256; 1652} 1653 1654multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1655 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : 1656 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1657 1658 let Predicates = [HasDQI, HasVLX] in 1659 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, 1660 WriteShuffleXLd, _Dst.info128, 1661 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1662 EVEX_V128; 1663} 1664 1665defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1666 avx512vl_i32_info, avx512vl_i64_info>; 1667defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1668 avx512vl_f32_info, avx512vl_f64_info>; 1669 1670//===----------------------------------------------------------------------===// 1671// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1672//--- 1673multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1674 X86VectorVTInfo _, RegisterClass KRC> { 1675 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1676 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1677 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1678 EVEX, Sched<[WriteShuffle]>; 1679} 1680 1681multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1682 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1683 let Predicates = [HasCDI] in 1684 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1685 let Predicates = [HasCDI, HasVLX] in { 1686 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1687 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1688 } 1689} 1690 1691defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1692 avx512vl_i32_info, VK16>; 1693defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1694 avx512vl_i64_info, VK8>, VEX_W; 1695 1696//===----------------------------------------------------------------------===// 1697// -- VPERMI2 - 3 source operands form -- 1698multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1699 X86FoldableSchedWrite sched, 1700 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1701let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1702 hasSideEffects = 0 in { 1703 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1704 (ins _.RC:$src2, _.RC:$src3), 1705 OpcodeStr, "$src3, $src2", "$src2, $src3", 1706 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1707 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1708 1709 let mayLoad = 1 in 1710 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1711 (ins _.RC:$src2, _.MemOp:$src3), 1712 OpcodeStr, "$src3, $src2", "$src2, $src3", 1713 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1714 (_.VT (_.LdFrag addr:$src3)))), 1>, 1715 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1716 } 1717} 1718 1719multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1720 X86FoldableSchedWrite sched, 1721 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1722 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1723 hasSideEffects = 0, mayLoad = 1 in 1724 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1725 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1726 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1727 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1728 (_.VT (X86VPermt2 _.RC:$src2, 1729 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1730 AVX5128IBase, EVEX_4V, EVEX_B, 1731 Sched<[sched.Folded, sched.ReadAfterFold]>; 1732} 1733 1734multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1735 X86FoldableSchedWrite sched, 1736 AVX512VLVectorVTInfo VTInfo, 1737 AVX512VLVectorVTInfo ShuffleMask> { 1738 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1739 ShuffleMask.info512>, 1740 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1741 ShuffleMask.info512>, EVEX_V512; 1742 let Predicates = [HasVLX] in { 1743 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1744 ShuffleMask.info128>, 1745 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1746 ShuffleMask.info128>, EVEX_V128; 1747 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1748 ShuffleMask.info256>, 1749 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1750 ShuffleMask.info256>, EVEX_V256; 1751 } 1752} 1753 1754multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1755 X86FoldableSchedWrite sched, 1756 AVX512VLVectorVTInfo VTInfo, 1757 AVX512VLVectorVTInfo Idx, 1758 Predicate Prd> { 1759 let Predicates = [Prd] in 1760 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1761 Idx.info512>, EVEX_V512; 1762 let Predicates = [Prd, HasVLX] in { 1763 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1764 Idx.info128>, EVEX_V128; 1765 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1766 Idx.info256>, EVEX_V256; 1767 } 1768} 1769 1770defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1771 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1772defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1773 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1774defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1775 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1776 VEX_W, EVEX_CD8<16, CD8VF>; 1777defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1778 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1779 EVEX_CD8<8, CD8VF>; 1780defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1781 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1782defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1783 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1784 1785// Extra patterns to deal with extra bitcasts due to passthru and index being 1786// different types on the fp versions. 1787multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1788 X86VectorVTInfo IdxVT, 1789 X86VectorVTInfo CastVT> { 1790 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1791 (X86VPermt2 (_.VT _.RC:$src2), 1792 (IdxVT.VT (bitconvert 1793 (CastVT.VT _.RC:$src1))), 1794 _.RC:$src3), 1795 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1796 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1797 _.RC:$src2, _.RC:$src3)>; 1798 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1799 (X86VPermt2 _.RC:$src2, 1800 (IdxVT.VT (bitconvert 1801 (CastVT.VT _.RC:$src1))), 1802 (_.LdFrag addr:$src3)), 1803 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1804 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1805 _.RC:$src2, addr:$src3)>; 1806 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1807 (X86VPermt2 _.RC:$src2, 1808 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1809 (_.BroadcastLdFrag addr:$src3)), 1810 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1811 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1812 _.RC:$src2, addr:$src3)>; 1813} 1814 1815// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1816defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1817defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1818defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1819 1820// VPERMT2 1821multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1822 X86FoldableSchedWrite sched, 1823 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1824let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1825 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1826 (ins IdxVT.RC:$src2, _.RC:$src3), 1827 OpcodeStr, "$src3, $src2", "$src2, $src3", 1828 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1829 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1830 1831 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1832 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1833 OpcodeStr, "$src3, $src2", "$src2, $src3", 1834 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1835 (_.LdFrag addr:$src3))), 1>, 1836 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1837 } 1838} 1839multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1840 X86FoldableSchedWrite sched, 1841 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1842 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1843 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1844 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1845 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1846 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1847 (_.VT (X86VPermt2 _.RC:$src1, 1848 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1849 AVX5128IBase, EVEX_4V, EVEX_B, 1850 Sched<[sched.Folded, sched.ReadAfterFold]>; 1851} 1852 1853multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1854 X86FoldableSchedWrite sched, 1855 AVX512VLVectorVTInfo VTInfo, 1856 AVX512VLVectorVTInfo ShuffleMask> { 1857 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1858 ShuffleMask.info512>, 1859 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1860 ShuffleMask.info512>, EVEX_V512; 1861 let Predicates = [HasVLX] in { 1862 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1863 ShuffleMask.info128>, 1864 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1865 ShuffleMask.info128>, EVEX_V128; 1866 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1867 ShuffleMask.info256>, 1868 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1869 ShuffleMask.info256>, EVEX_V256; 1870 } 1871} 1872 1873multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1874 X86FoldableSchedWrite sched, 1875 AVX512VLVectorVTInfo VTInfo, 1876 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1877 let Predicates = [Prd] in 1878 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1879 Idx.info512>, EVEX_V512; 1880 let Predicates = [Prd, HasVLX] in { 1881 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1882 Idx.info128>, EVEX_V128; 1883 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1884 Idx.info256>, EVEX_V256; 1885 } 1886} 1887 1888defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1889 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1890defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1891 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1892defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1893 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1894 VEX_W, EVEX_CD8<16, CD8VF>; 1895defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1896 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1897 EVEX_CD8<8, CD8VF>; 1898defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1899 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1900defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1901 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1902 1903//===----------------------------------------------------------------------===// 1904// AVX-512 - BLEND using mask 1905// 1906 1907multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1908 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1909 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1910 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1911 (ins _.RC:$src1, _.RC:$src2), 1912 !strconcat(OpcodeStr, 1913 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1914 EVEX_4V, Sched<[sched]>; 1915 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1916 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1917 !strconcat(OpcodeStr, 1918 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1919 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1920 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1921 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1922 !strconcat(OpcodeStr, 1923 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1924 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1925 let mayLoad = 1 in { 1926 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1927 (ins _.RC:$src1, _.MemOp:$src2), 1928 !strconcat(OpcodeStr, 1929 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1930 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1931 Sched<[sched.Folded, sched.ReadAfterFold]>; 1932 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1933 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1934 !strconcat(OpcodeStr, 1935 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1936 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1937 Sched<[sched.Folded, sched.ReadAfterFold]>; 1938 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1939 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1940 !strconcat(OpcodeStr, 1941 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1942 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1943 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1944 } 1945 } 1946} 1947multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1948 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1949 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1950 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1951 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1952 !strconcat(OpcodeStr, 1953 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1954 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1955 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1956 Sched<[sched.Folded, sched.ReadAfterFold]>; 1957 1958 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1959 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1960 !strconcat(OpcodeStr, 1961 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1962 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1963 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1964 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 1965 1966 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1967 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1968 !strconcat(OpcodeStr, 1969 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1970 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1971 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1972 Sched<[sched.Folded, sched.ReadAfterFold]>; 1973 } 1974} 1975 1976multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1977 AVX512VLVectorVTInfo VTInfo> { 1978 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1979 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1980 EVEX_V512; 1981 1982 let Predicates = [HasVLX] in { 1983 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1984 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1985 EVEX_V256; 1986 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1987 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1988 EVEX_V128; 1989 } 1990} 1991 1992multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1993 AVX512VLVectorVTInfo VTInfo> { 1994 let Predicates = [HasBWI] in 1995 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1996 EVEX_V512; 1997 1998 let Predicates = [HasBWI, HasVLX] in { 1999 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2000 EVEX_V256; 2001 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2002 EVEX_V128; 2003 } 2004} 2005 2006defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2007 avx512vl_f32_info>; 2008defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2009 avx512vl_f64_info>, VEX_W; 2010defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2011 avx512vl_i32_info>; 2012defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2013 avx512vl_i64_info>, VEX_W; 2014defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2015 avx512vl_i8_info>; 2016defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2017 avx512vl_i16_info>, VEX_W; 2018 2019//===----------------------------------------------------------------------===// 2020// Compare Instructions 2021//===----------------------------------------------------------------------===// 2022 2023// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2024 2025multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2026 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2027 X86FoldableSchedWrite sched> { 2028 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2029 (outs _.KRC:$dst), 2030 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2031 "vcmp"#_.Suffix, 2032 "$cc, $src2, $src1", "$src1, $src2, $cc", 2033 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2034 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2035 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2036 let mayLoad = 1 in 2037 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2038 (outs _.KRC:$dst), 2039 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2040 "vcmp"#_.Suffix, 2041 "$cc, $src2, $src1", "$src1, $src2, $cc", 2042 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2043 timm:$cc), 2044 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2045 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2046 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2047 2048 let Uses = [MXCSR] in 2049 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2050 (outs _.KRC:$dst), 2051 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2052 "vcmp"#_.Suffix, 2053 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2054 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2055 timm:$cc), 2056 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2057 timm:$cc)>, 2058 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2059 2060 let isCodeGenOnly = 1 in { 2061 let isCommutable = 1 in 2062 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2063 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2064 !strconcat("vcmp", _.Suffix, 2065 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2066 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2067 _.FRC:$src2, 2068 timm:$cc))]>, 2069 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2070 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2071 (outs _.KRC:$dst), 2072 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2073 !strconcat("vcmp", _.Suffix, 2074 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2075 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2076 (_.ScalarLdFrag addr:$src2), 2077 timm:$cc))]>, 2078 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2079 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2080 } 2081} 2082 2083def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2084 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2085 return N->hasOneUse(); 2086}]>; 2087def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2088 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2089 return N->hasOneUse(); 2090}]>; 2091 2092let Predicates = [HasAVX512] in { 2093 let ExeDomain = SSEPackedSingle in 2094 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2095 X86cmpms_su, X86cmpmsSAE_su, 2096 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2097 let ExeDomain = SSEPackedDouble in 2098 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2099 X86cmpms_su, X86cmpmsSAE_su, 2100 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2101} 2102 2103multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2104 X86FoldableSchedWrite sched, 2105 X86VectorVTInfo _, bit IsCommutable> { 2106 let isCommutable = IsCommutable, hasSideEffects = 0 in 2107 def rr : AVX512BI<opc, MRMSrcReg, 2108 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2109 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2110 []>, EVEX_4V, Sched<[sched]>; 2111 let mayLoad = 1, hasSideEffects = 0 in 2112 def rm : AVX512BI<opc, MRMSrcMem, 2113 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2114 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2115 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2116 let isCommutable = IsCommutable, hasSideEffects = 0 in 2117 def rrk : AVX512BI<opc, MRMSrcReg, 2118 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2119 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2120 "$dst {${mask}}, $src1, $src2}"), 2121 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2122 let mayLoad = 1, hasSideEffects = 0 in 2123 def rmk : AVX512BI<opc, MRMSrcMem, 2124 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2125 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2126 "$dst {${mask}}, $src1, $src2}"), 2127 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2128} 2129 2130multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2131 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2132 bit IsCommutable> : 2133 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2134 let mayLoad = 1, hasSideEffects = 0 in { 2135 def rmb : AVX512BI<opc, MRMSrcMem, 2136 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2137 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2138 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2139 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2140 def rmbk : AVX512BI<opc, MRMSrcMem, 2141 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2142 _.ScalarMemOp:$src2), 2143 !strconcat(OpcodeStr, 2144 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2145 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2146 []>, EVEX_4V, EVEX_K, EVEX_B, 2147 Sched<[sched.Folded, sched.ReadAfterFold]>; 2148 } 2149} 2150 2151multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2152 X86SchedWriteWidths sched, 2153 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2154 bit IsCommutable = 0> { 2155 let Predicates = [prd] in 2156 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2157 VTInfo.info512, IsCommutable>, EVEX_V512; 2158 2159 let Predicates = [prd, HasVLX] in { 2160 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2161 VTInfo.info256, IsCommutable>, EVEX_V256; 2162 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2163 VTInfo.info128, IsCommutable>, EVEX_V128; 2164 } 2165} 2166 2167multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2168 X86SchedWriteWidths sched, 2169 AVX512VLVectorVTInfo VTInfo, 2170 Predicate prd, bit IsCommutable = 0> { 2171 let Predicates = [prd] in 2172 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2173 VTInfo.info512, IsCommutable>, EVEX_V512; 2174 2175 let Predicates = [prd, HasVLX] in { 2176 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2177 VTInfo.info256, IsCommutable>, EVEX_V256; 2178 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2179 VTInfo.info128, IsCommutable>, EVEX_V128; 2180 } 2181} 2182 2183// This fragment treats X86cmpm as commutable to help match loads in both 2184// operands for PCMPEQ. 2185def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2186def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2187 (setcc node:$src1, node:$src2, SETGT)>; 2188 2189// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2190// increase the pattern complexity the way an immediate would. 2191let AddedComplexity = 2 in { 2192// FIXME: Is there a better scheduler class for VPCMP? 2193defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2194 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2195 EVEX_CD8<8, CD8VF>, VEX_WIG; 2196 2197defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2198 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2199 EVEX_CD8<16, CD8VF>, VEX_WIG; 2200 2201defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2202 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2203 EVEX_CD8<32, CD8VF>; 2204 2205defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2206 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2207 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2208 2209defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2210 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2211 EVEX_CD8<8, CD8VF>, VEX_WIG; 2212 2213defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2214 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2215 EVEX_CD8<16, CD8VF>, VEX_WIG; 2216 2217defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2218 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2219 EVEX_CD8<32, CD8VF>; 2220 2221defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2222 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2223 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2224} 2225 2226multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2227 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su, 2228 X86FoldableSchedWrite sched, 2229 X86VectorVTInfo _, string Name> { 2230 let isCommutable = 1 in 2231 def rri : AVX512AIi8<opc, MRMSrcReg, 2232 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2233 !strconcat("vpcmp", Suffix, 2234 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2235 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2236 (_.VT _.RC:$src2), 2237 cond)))]>, 2238 EVEX_4V, Sched<[sched]>; 2239 def rmi : AVX512AIi8<opc, MRMSrcMem, 2240 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2241 !strconcat("vpcmp", Suffix, 2242 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2243 [(set _.KRC:$dst, (_.KVT 2244 (Frag:$cc 2245 (_.VT _.RC:$src1), 2246 (_.VT (_.LdFrag addr:$src2)), 2247 cond)))]>, 2248 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2249 let isCommutable = 1 in 2250 def rrik : AVX512AIi8<opc, MRMSrcReg, 2251 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2252 u8imm:$cc), 2253 !strconcat("vpcmp", Suffix, 2254 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2255 "$dst {${mask}}, $src1, $src2, $cc}"), 2256 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2257 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2258 (_.VT _.RC:$src2), 2259 cond))))]>, 2260 EVEX_4V, EVEX_K, Sched<[sched]>; 2261 def rmik : AVX512AIi8<opc, MRMSrcMem, 2262 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2263 u8imm:$cc), 2264 !strconcat("vpcmp", Suffix, 2265 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2266 "$dst {${mask}}, $src1, $src2, $cc}"), 2267 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2268 (_.KVT 2269 (Frag_su:$cc 2270 (_.VT _.RC:$src1), 2271 (_.VT (_.LdFrag addr:$src2)), 2272 cond))))]>, 2273 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2274 2275 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2), 2276 (_.VT _.RC:$src1), cond)), 2277 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2278 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2279 2280 def : Pat<(and _.KRCWM:$mask, 2281 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2), 2282 (_.VT _.RC:$src1), cond))), 2283 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2284 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2285 (CommFrag.OperandTransform $cc))>; 2286} 2287 2288multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2289 PatFrag Frag_su, PatFrag CommFrag, 2290 PatFrag CommFrag_su, X86FoldableSchedWrite sched, 2291 X86VectorVTInfo _, string Name> : 2292 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2293 sched, _, Name> { 2294 def rmib : AVX512AIi8<opc, MRMSrcMem, 2295 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2296 u8imm:$cc), 2297 !strconcat("vpcmp", Suffix, 2298 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2299 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2300 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2301 (_.VT _.RC:$src1), 2302 (_.BroadcastLdFrag addr:$src2), 2303 cond)))]>, 2304 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2305 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2306 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2307 _.ScalarMemOp:$src2, u8imm:$cc), 2308 !strconcat("vpcmp", Suffix, 2309 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2310 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2311 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2312 (_.KVT (Frag_su:$cc 2313 (_.VT _.RC:$src1), 2314 (_.BroadcastLdFrag addr:$src2), 2315 cond))))]>, 2316 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2317 2318 def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2), 2319 (_.VT _.RC:$src1), cond)), 2320 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2321 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2322 2323 def : Pat<(and _.KRCWM:$mask, 2324 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2), 2325 (_.VT _.RC:$src1), cond))), 2326 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2327 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2328 (CommFrag_su.OperandTransform $cc))>; 2329} 2330 2331multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2332 PatFrag Frag_su, PatFrag CommFrag, 2333 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2334 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2335 let Predicates = [prd] in 2336 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2337 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2338 2339 let Predicates = [prd, HasVLX] in { 2340 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2341 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2342 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2343 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2344 } 2345} 2346 2347multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2348 PatFrag Frag_su, PatFrag CommFrag, 2349 PatFrag CommFrag_su, X86SchedWriteWidths sched, 2350 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2351 let Predicates = [prd] in 2352 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2353 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2354 2355 let Predicates = [prd, HasVLX] in { 2356 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2357 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2358 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su, 2359 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2360 } 2361} 2362 2363def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2364 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2365 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2366 return getI8Imm(SSECC, SDLoc(N)); 2367}]>; 2368 2369// Swapped operand version of the above. 2370def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2371 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2372 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2373 SSECC = X86::getSwappedVPCMPImm(SSECC); 2374 return getI8Imm(SSECC, SDLoc(N)); 2375}]>; 2376 2377def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2378 (setcc node:$src1, node:$src2, node:$cc), [{ 2379 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2380 return !ISD::isUnsignedIntSetCC(CC); 2381}], X86pcmpm_imm>; 2382 2383def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2384 (setcc node:$src1, node:$src2, node:$cc), [{ 2385 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2386 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2387}], X86pcmpm_imm>; 2388 2389// Same as above, but commutes immediate. Use for load folding. 2390def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2391 (setcc node:$src1, node:$src2, node:$cc), [{ 2392 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2393 return !ISD::isUnsignedIntSetCC(CC); 2394}], X86pcmpm_imm_commute>; 2395 2396def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2397 (setcc node:$src1, node:$src2, node:$cc), [{ 2398 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2399 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2400}], X86pcmpm_imm_commute>; 2401 2402def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2403 (setcc node:$src1, node:$src2, node:$cc), [{ 2404 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2405 return ISD::isUnsignedIntSetCC(CC); 2406}], X86pcmpm_imm>; 2407 2408def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2409 (setcc node:$src1, node:$src2, node:$cc), [{ 2410 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2411 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2412}], X86pcmpm_imm>; 2413 2414// Same as above, but commutes immediate. Use for load folding. 2415def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2416 (setcc node:$src1, node:$src2, node:$cc), [{ 2417 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2418 return ISD::isUnsignedIntSetCC(CC); 2419}], X86pcmpm_imm_commute>; 2420 2421def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2422 (setcc node:$src1, node:$src2, node:$cc), [{ 2423 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2424 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2425}], X86pcmpm_imm_commute>; 2426 2427// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2428defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2429 X86pcmpm_commute, X86pcmpm_commute_su, 2430 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2431 EVEX_CD8<8, CD8VF>; 2432defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2433 X86pcmpum_commute, X86pcmpum_commute_su, 2434 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2435 EVEX_CD8<8, CD8VF>; 2436 2437defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2438 X86pcmpm_commute, X86pcmpm_commute_su, 2439 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2440 VEX_W, EVEX_CD8<16, CD8VF>; 2441defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2442 X86pcmpum_commute, X86pcmpum_commute_su, 2443 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2444 VEX_W, EVEX_CD8<16, CD8VF>; 2445 2446defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2447 X86pcmpm_commute, X86pcmpm_commute_su, 2448 SchedWriteVecALU, avx512vl_i32_info, 2449 HasAVX512>, EVEX_CD8<32, CD8VF>; 2450defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2451 X86pcmpum_commute, X86pcmpum_commute_su, 2452 SchedWriteVecALU, avx512vl_i32_info, 2453 HasAVX512>, EVEX_CD8<32, CD8VF>; 2454 2455defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2456 X86pcmpm_commute, X86pcmpm_commute_su, 2457 SchedWriteVecALU, avx512vl_i64_info, 2458 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2459defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2460 X86pcmpum_commute, X86pcmpum_commute_su, 2461 SchedWriteVecALU, avx512vl_i64_info, 2462 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2463 2464def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2465 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2466 return N->hasOneUse(); 2467}]>; 2468 2469def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2470 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2471 return getI8Imm(Imm, SDLoc(N)); 2472}]>; 2473 2474multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2475 string Name> { 2476let Uses = [MXCSR], mayRaiseFPException = 1 in { 2477 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2478 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2479 "vcmp"#_.Suffix, 2480 "$cc, $src2, $src1", "$src1, $src2, $cc", 2481 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2482 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2483 1>, Sched<[sched]>; 2484 2485 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2486 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2487 "vcmp"#_.Suffix, 2488 "$cc, $src2, $src1", "$src1, $src2, $cc", 2489 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2490 timm:$cc), 2491 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2492 timm:$cc)>, 2493 Sched<[sched.Folded, sched.ReadAfterFold]>; 2494 2495 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2496 (outs _.KRC:$dst), 2497 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2498 "vcmp"#_.Suffix, 2499 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2500 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2501 (X86any_cmpm (_.VT _.RC:$src1), 2502 (_.VT (_.BroadcastLdFrag addr:$src2)), 2503 timm:$cc), 2504 (X86cmpm_su (_.VT _.RC:$src1), 2505 (_.VT (_.BroadcastLdFrag addr:$src2)), 2506 timm:$cc)>, 2507 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2508 } 2509 2510 // Patterns for selecting with loads in other operand. 2511 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2512 timm:$cc), 2513 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2514 (X86cmpm_imm_commute timm:$cc))>; 2515 2516 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2517 (_.VT _.RC:$src1), 2518 timm:$cc)), 2519 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2520 _.RC:$src1, addr:$src2, 2521 (X86cmpm_imm_commute timm:$cc))>; 2522 2523 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2524 (_.VT _.RC:$src1), timm:$cc), 2525 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2526 (X86cmpm_imm_commute timm:$cc))>; 2527 2528 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2529 (_.VT _.RC:$src1), 2530 timm:$cc)), 2531 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2532 _.RC:$src1, addr:$src2, 2533 (X86cmpm_imm_commute timm:$cc))>; 2534 2535 // Patterns for mask intrinsics. 2536 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2537 (_.KVT immAllOnesV)), 2538 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2539 2540 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2541 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2542 _.RC:$src2, timm:$cc)>; 2543 2544 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2545 (_.KVT immAllOnesV)), 2546 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2547 2548 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2549 _.KRCWM:$mask), 2550 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2551 addr:$src2, timm:$cc)>; 2552 2553 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2554 (_.KVT immAllOnesV)), 2555 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2556 2557 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2558 _.KRCWM:$mask), 2559 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2560 addr:$src2, timm:$cc)>; 2561 2562 // Patterns for mask intrinsics with loads in other operand. 2563 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2564 (_.KVT immAllOnesV)), 2565 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2566 (X86cmpm_imm_commute timm:$cc))>; 2567 2568 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2569 _.KRCWM:$mask), 2570 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2571 _.RC:$src1, addr:$src2, 2572 (X86cmpm_imm_commute timm:$cc))>; 2573 2574 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2575 (_.KVT immAllOnesV)), 2576 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2577 (X86cmpm_imm_commute timm:$cc))>; 2578 2579 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2580 _.KRCWM:$mask), 2581 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2582 _.RC:$src1, addr:$src2, 2583 (X86cmpm_imm_commute timm:$cc))>; 2584} 2585 2586multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2587 // comparison code form (VCMP[EQ/LT/LE/...] 2588 let Uses = [MXCSR] in 2589 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2590 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2591 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2592 "vcmp"#_.Suffix, 2593 "$cc, {sae}, $src2, $src1", 2594 "$src1, $src2, {sae}, $cc", 2595 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2596 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2597 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2598 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2599 EVEX_B, Sched<[sched]>; 2600} 2601 2602multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 2603 let Predicates = [HasAVX512] in { 2604 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2605 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2606 2607 } 2608 let Predicates = [HasAVX512,HasVLX] in { 2609 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2610 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2611 } 2612} 2613 2614defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2615 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2616defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2617 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2618 2619// Patterns to select fp compares with load as first operand. 2620let Predicates = [HasAVX512] in { 2621 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, 2622 timm:$cc)), 2623 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2624 2625 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, 2626 timm:$cc)), 2627 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2628} 2629 2630// ---------------------------------------------------------------- 2631// FPClass 2632 2633def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2634 (X86Vfpclasss node:$src1, node:$src2), [{ 2635 return N->hasOneUse(); 2636}]>; 2637 2638def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2639 (X86Vfpclass node:$src1, node:$src2), [{ 2640 return N->hasOneUse(); 2641}]>; 2642 2643//handle fpclass instruction mask = op(reg_scalar,imm) 2644// op(mem_scalar,imm) 2645multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2646 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2647 Predicate prd> { 2648 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2649 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2650 (ins _.RC:$src1, i32u8imm:$src2), 2651 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2652 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2653 (i32 timm:$src2)))]>, 2654 Sched<[sched]>; 2655 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2656 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2657 OpcodeStr#_.Suffix# 2658 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2659 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2660 (X86Vfpclasss_su (_.VT _.RC:$src1), 2661 (i32 timm:$src2))))]>, 2662 EVEX_K, Sched<[sched]>; 2663 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2664 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2665 OpcodeStr#_.Suffix# 2666 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2667 [(set _.KRC:$dst, 2668 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2669 (i32 timm:$src2)))]>, 2670 Sched<[sched.Folded, sched.ReadAfterFold]>; 2671 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2672 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2673 OpcodeStr#_.Suffix# 2674 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2675 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2676 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2677 (i32 timm:$src2))))]>, 2678 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2679 } 2680} 2681 2682//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2683// fpclass(reg_vec, mem_vec, imm) 2684// fpclass(reg_vec, broadcast(eltVt), imm) 2685multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2686 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2687 string mem>{ 2688 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2689 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2690 (ins _.RC:$src1, i32u8imm:$src2), 2691 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2692 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2693 (i32 timm:$src2)))]>, 2694 Sched<[sched]>; 2695 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2696 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2697 OpcodeStr#_.Suffix# 2698 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2699 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2700 (X86Vfpclass_su (_.VT _.RC:$src1), 2701 (i32 timm:$src2))))]>, 2702 EVEX_K, Sched<[sched]>; 2703 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2704 (ins _.MemOp:$src1, i32u8imm:$src2), 2705 OpcodeStr#_.Suffix#"{"#mem#"}"# 2706 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2707 [(set _.KRC:$dst,(X86Vfpclass 2708 (_.VT (_.LdFrag addr:$src1)), 2709 (i32 timm:$src2)))]>, 2710 Sched<[sched.Folded, sched.ReadAfterFold]>; 2711 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2712 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2713 OpcodeStr#_.Suffix#"{"#mem#"}"# 2714 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2715 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2716 (_.VT (_.LdFrag addr:$src1)), 2717 (i32 timm:$src2))))]>, 2718 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2719 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2720 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2721 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2722 _.BroadcastStr#", $dst|$dst, ${src1}" 2723 #_.BroadcastStr#", $src2}", 2724 [(set _.KRC:$dst,(X86Vfpclass 2725 (_.VT (_.BroadcastLdFrag addr:$src1)), 2726 (i32 timm:$src2)))]>, 2727 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2728 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2729 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2730 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2731 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2732 _.BroadcastStr#", $src2}", 2733 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2734 (_.VT (_.BroadcastLdFrag addr:$src1)), 2735 (i32 timm:$src2))))]>, 2736 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2737 } 2738 2739 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2740 // the memory form. 2741 def : InstAlias<OpcodeStr#_.Suffix#mem# 2742 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2743 (!cast<Instruction>(NAME#"rr") 2744 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2745 def : InstAlias<OpcodeStr#_.Suffix#mem# 2746 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2747 (!cast<Instruction>(NAME#"rrk") 2748 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2749 def : InstAlias<OpcodeStr#_.Suffix#mem# 2750 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2751 _.BroadcastStr#", $src2}", 2752 (!cast<Instruction>(NAME#"rmb") 2753 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2754 def : InstAlias<OpcodeStr#_.Suffix#mem# 2755 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2756 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2757 (!cast<Instruction>(NAME#"rmbk") 2758 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2759} 2760 2761multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2762 bits<8> opc, X86SchedWriteWidths sched, 2763 Predicate prd>{ 2764 let Predicates = [prd] in { 2765 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2766 _.info512, "z">, EVEX_V512; 2767 } 2768 let Predicates = [prd, HasVLX] in { 2769 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2770 _.info128, "x">, EVEX_V128; 2771 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2772 _.info256, "y">, EVEX_V256; 2773 } 2774} 2775 2776multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2777 bits<8> opcScalar, X86SchedWriteWidths sched, 2778 Predicate prd> { 2779 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2780 sched, prd>, 2781 EVEX_CD8<32, CD8VF>; 2782 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2783 sched, prd>, 2784 EVEX_CD8<64, CD8VF> , VEX_W; 2785 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2786 sched.Scl, f32x_info, prd>, VEX_LIG, 2787 EVEX_CD8<32, CD8VT1>; 2788 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2789 sched.Scl, f64x_info, prd>, VEX_LIG, 2790 EVEX_CD8<64, CD8VT1>, VEX_W; 2791} 2792 2793defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp, 2794 HasDQI>, AVX512AIi8Base, EVEX; 2795 2796//----------------------------------------------------------------- 2797// Mask register copy, including 2798// - copy between mask registers 2799// - load/store mask registers 2800// - copy from GPR to mask register and vice versa 2801// 2802multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2803 string OpcodeStr, RegisterClass KRC, 2804 ValueType vvt, X86MemOperand x86memop> { 2805 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2806 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2807 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2808 Sched<[WriteMove]>; 2809 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2810 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2811 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2812 Sched<[WriteLoad]>; 2813 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2814 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2815 [(store KRC:$src, addr:$dst)]>, 2816 Sched<[WriteStore]>; 2817} 2818 2819multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2820 string OpcodeStr, 2821 RegisterClass KRC, RegisterClass GRC> { 2822 let hasSideEffects = 0 in { 2823 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2824 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2825 Sched<[WriteMove]>; 2826 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2827 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2828 Sched<[WriteMove]>; 2829 } 2830} 2831 2832let Predicates = [HasDQI] in 2833 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2834 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2835 VEX, PD; 2836 2837let Predicates = [HasAVX512] in 2838 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2839 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2840 VEX, PS; 2841 2842let Predicates = [HasBWI] in { 2843 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2844 VEX, PD, VEX_W; 2845 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2846 VEX, XD; 2847 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2848 VEX, PS, VEX_W; 2849 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2850 VEX, XD, VEX_W; 2851} 2852 2853// GR from/to mask register 2854def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2855 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2856def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2857 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2858def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2859 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2860 2861def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2862 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2863def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2864 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2865 2866def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2867 (KMOVWrk VK16:$src)>; 2868def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2869 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2870def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2871 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2872def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2873 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2874 2875def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2876 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2877def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2878 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2879def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2880 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2881def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2882 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2883 2884def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2885 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2886def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2887 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2888def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2889 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2890def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2891 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2892 2893// Load/store kreg 2894let Predicates = [HasDQI] in { 2895 def : Pat<(v1i1 (load addr:$src)), 2896 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2897 def : Pat<(v2i1 (load addr:$src)), 2898 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2899 def : Pat<(v4i1 (load addr:$src)), 2900 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2901} 2902 2903let Predicates = [HasAVX512] in { 2904 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2905 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2906 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2907 (KMOVWkm addr:$src)>; 2908} 2909 2910def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2911 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2912 SDTCVecEltisVT<1, i1>, 2913 SDTCisPtrTy<2>]>>; 2914 2915let Predicates = [HasAVX512] in { 2916 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2917 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2918 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2919 2920 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2921 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2922 2923 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2924 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2925 2926 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2927 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2928 } 2929 2930 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2931 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2932 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2933 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2934 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2935 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2936 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2937 2938 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2939 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2940 (KMOVWkr (AND32ri8 2941 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2942 (i32 1)))>; 2943} 2944 2945// Mask unary operation 2946// - KNOT 2947multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2948 RegisterClass KRC, SDPatternOperator OpNode, 2949 X86FoldableSchedWrite sched, Predicate prd> { 2950 let Predicates = [prd] in 2951 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2952 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2953 [(set KRC:$dst, (OpNode KRC:$src))]>, 2954 Sched<[sched]>; 2955} 2956 2957multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2958 SDPatternOperator OpNode, 2959 X86FoldableSchedWrite sched> { 2960 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2961 sched, HasDQI>, VEX, PD; 2962 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2963 sched, HasAVX512>, VEX, PS; 2964 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2965 sched, HasBWI>, VEX, PD, VEX_W; 2966 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2967 sched, HasBWI>, VEX, PS, VEX_W; 2968} 2969 2970// TODO - do we need a X86SchedWriteWidths::KMASK type? 2971defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2972 2973// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2974let Predicates = [HasAVX512, NoDQI] in 2975def : Pat<(vnot VK8:$src), 2976 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2977 2978def : Pat<(vnot VK4:$src), 2979 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2980def : Pat<(vnot VK2:$src), 2981 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2982def : Pat<(vnot VK1:$src), 2983 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 2984 2985// Mask binary operation 2986// - KAND, KANDN, KOR, KXNOR, KXOR 2987multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2988 RegisterClass KRC, SDPatternOperator OpNode, 2989 X86FoldableSchedWrite sched, Predicate prd, 2990 bit IsCommutable> { 2991 let Predicates = [prd], isCommutable = IsCommutable in 2992 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2993 !strconcat(OpcodeStr, 2994 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2995 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2996 Sched<[sched]>; 2997} 2998 2999multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 3000 SDPatternOperator OpNode, 3001 X86FoldableSchedWrite sched, bit IsCommutable, 3002 Predicate prdW = HasAVX512> { 3003 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3004 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 3005 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3006 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 3007 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3008 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 3009 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3010 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3011} 3012 3013// These nodes use 'vnot' instead of 'not' to support vectors. 3014def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3015def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3016 3017// TODO - do we need a X86SchedWriteWidths::KMASK type? 3018defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3019defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3020defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3021defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3022defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3023defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3024 3025multiclass avx512_binop_pat<SDPatternOperator VOpNode, 3026 Instruction Inst> { 3027 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3028 // for the DQI set, this type is legal and KxxxB instruction is used 3029 let Predicates = [NoDQI] in 3030 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3031 (COPY_TO_REGCLASS 3032 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3033 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3034 3035 // All types smaller than 8 bits require conversion anyway 3036 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 3037 (COPY_TO_REGCLASS (Inst 3038 (COPY_TO_REGCLASS VK1:$src1, VK16), 3039 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3040 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3041 (COPY_TO_REGCLASS (Inst 3042 (COPY_TO_REGCLASS VK2:$src1, VK16), 3043 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 3044 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3045 (COPY_TO_REGCLASS (Inst 3046 (COPY_TO_REGCLASS VK4:$src1, VK16), 3047 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 3048} 3049 3050defm : avx512_binop_pat<and, KANDWrr>; 3051defm : avx512_binop_pat<vandn, KANDNWrr>; 3052defm : avx512_binop_pat<or, KORWrr>; 3053defm : avx512_binop_pat<vxnor, KXNORWrr>; 3054defm : avx512_binop_pat<xor, KXORWrr>; 3055 3056// Mask unpacking 3057multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3058 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3059 Predicate prd> { 3060 let Predicates = [prd] in { 3061 let hasSideEffects = 0 in 3062 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3063 (ins Src.KRC:$src1, Src.KRC:$src2), 3064 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3065 VEX_4V, VEX_L, Sched<[sched]>; 3066 3067 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3068 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 3069 } 3070} 3071 3072defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3073defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3074defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3075 3076// Mask bit testing 3077multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3078 SDNode OpNode, X86FoldableSchedWrite sched, 3079 Predicate prd> { 3080 let Predicates = [prd], Defs = [EFLAGS] in 3081 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3082 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3083 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3084 Sched<[sched]>; 3085} 3086 3087multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3088 X86FoldableSchedWrite sched, 3089 Predicate prdW = HasAVX512> { 3090 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3091 VEX, PD; 3092 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3093 VEX, PS; 3094 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3095 VEX, PS, VEX_W; 3096 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3097 VEX, PD, VEX_W; 3098} 3099 3100// TODO - do we need a X86SchedWriteWidths::KMASK type? 3101defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3102defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3103 3104// Mask shift 3105multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3106 SDNode OpNode, X86FoldableSchedWrite sched> { 3107 let Predicates = [HasAVX512] in 3108 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3109 !strconcat(OpcodeStr, 3110 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3111 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3112 Sched<[sched]>; 3113} 3114 3115multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3116 SDNode OpNode, X86FoldableSchedWrite sched> { 3117 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3118 sched>, VEX, TAPD, VEX_W; 3119 let Predicates = [HasDQI] in 3120 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3121 sched>, VEX, TAPD; 3122 let Predicates = [HasBWI] in { 3123 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3124 sched>, VEX, TAPD, VEX_W; 3125 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3126 sched>, VEX, TAPD; 3127 } 3128} 3129 3130defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3131defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3132 3133// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3134multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3135 string InstStr, 3136 X86VectorVTInfo Narrow, 3137 X86VectorVTInfo Wide> { 3138def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3139 (Narrow.VT Narrow.RC:$src2), cond)), 3140 (COPY_TO_REGCLASS 3141 (!cast<Instruction>(InstStr#"Zrri") 3142 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3143 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3144 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3145 3146def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3147 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3148 (Narrow.VT Narrow.RC:$src2), 3149 cond)))), 3150 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3151 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3152 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3153 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3154 (Frag_su.OperandTransform $cc)), Narrow.KRC)>; 3155} 3156 3157multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3158 PatFrag CommFrag, PatFrag CommFrag_su, 3159 string InstStr, 3160 X86VectorVTInfo Narrow, 3161 X86VectorVTInfo Wide> { 3162// Broadcast load. 3163def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3164 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3165 (COPY_TO_REGCLASS 3166 (!cast<Instruction>(InstStr#"Zrmib") 3167 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3168 addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; 3169 3170def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3171 (Narrow.KVT 3172 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3173 (Narrow.BroadcastLdFrag addr:$src2), 3174 cond)))), 3175 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3176 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3177 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3178 addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; 3179 3180// Commuted with broadcast load. 3181def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3182 (Narrow.VT Narrow.RC:$src1), 3183 cond)), 3184 (COPY_TO_REGCLASS 3185 (!cast<Instruction>(InstStr#"Zrmib") 3186 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3187 addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; 3188 3189def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3190 (Narrow.KVT 3191 (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3192 (Narrow.VT Narrow.RC:$src1), 3193 cond)))), 3194 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3195 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3196 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3197 addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; 3198} 3199 3200// Same as above, but for fp types which don't use PatFrags. 3201multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3202 X86VectorVTInfo Narrow, 3203 X86VectorVTInfo Wide> { 3204def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3205 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3206 (COPY_TO_REGCLASS 3207 (!cast<Instruction>(InstStr#"Zrri") 3208 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3209 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3210 timm:$cc), Narrow.KRC)>; 3211 3212def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3213 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3214 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3215 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3216 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3217 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3218 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3219 timm:$cc), Narrow.KRC)>; 3220 3221// Broadcast load. 3222def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3223 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3224 (COPY_TO_REGCLASS 3225 (!cast<Instruction>(InstStr#"Zrmbi") 3226 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3227 addr:$src2, timm:$cc), Narrow.KRC)>; 3228 3229def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3230 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3231 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3232 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3233 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3234 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3235 addr:$src2, timm:$cc), Narrow.KRC)>; 3236 3237// Commuted with broadcast load. 3238def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3239 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3240 (COPY_TO_REGCLASS 3241 (!cast<Instruction>(InstStr#"Zrmbi") 3242 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3243 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3244 3245def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3246 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3247 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3248 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3249 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3250 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3251 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3252} 3253 3254let Predicates = [HasAVX512, NoVLX] in { 3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3256 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3257 3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3259 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3260 3261 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3262 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3263 3264 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3265 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3266 3267 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>; 3268 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3269 3270 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>; 3271 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3272 3273 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3274 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3275 3276 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3277 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3278 3279 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3280 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3281 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3282 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3283} 3284 3285let Predicates = [HasBWI, NoVLX] in { 3286 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3287 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3288 3289 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3290 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3291 3292 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3293 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3294 3295 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3296 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3297} 3298 3299// Mask setting all 0s or 1s 3300multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { 3301 let Predicates = [HasAVX512] in 3302 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3303 SchedRW = [WriteZero] in 3304 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3305 [(set KRC:$dst, (VT Val))]>; 3306} 3307 3308multiclass avx512_mask_setop_w<PatFrag Val> { 3309 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3310 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3311 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3312} 3313 3314defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3315defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3316 3317// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3318let Predicates = [HasAVX512] in { 3319 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3320 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3321 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3322 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3323 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3324 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3325 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3326 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3327} 3328 3329// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3330multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3331 RegisterClass RC, ValueType VT> { 3332 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3333 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3334 3335 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3336 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3337} 3338defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3339defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3340defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3341defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3342defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3343defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3344 3345defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3346defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3347defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3348defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3349defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3350 3351defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3352defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3353defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3354defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3355 3356defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3357defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3358defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3359 3360defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3361defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3362 3363defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3364 3365//===----------------------------------------------------------------------===// 3366// AVX-512 - Aligned and unaligned load and store 3367// 3368 3369multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3370 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3371 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3372 bit NoRMPattern = 0, 3373 SDPatternOperator SelectOprr = vselect> { 3374 let hasSideEffects = 0 in { 3375 let isMoveReg = 1 in 3376 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3377 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3378 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3379 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3380 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3381 (ins _.KRCWM:$mask, _.RC:$src), 3382 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3383 "${dst} {${mask}} {z}, $src}"), 3384 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3385 (_.VT _.RC:$src), 3386 _.ImmAllZerosV)))], _.ExeDomain>, 3387 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3388 3389 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3390 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3391 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3392 !if(NoRMPattern, [], 3393 [(set _.RC:$dst, 3394 (_.VT (ld_frag addr:$src)))]), 3395 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3396 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3397 3398 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3399 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3400 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3401 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3402 "${dst} {${mask}}, $src1}"), 3403 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3404 (_.VT _.RC:$src1), 3405 (_.VT _.RC:$src0))))], _.ExeDomain>, 3406 EVEX, EVEX_K, Sched<[Sched.RR]>; 3407 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3408 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3409 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3410 "${dst} {${mask}}, $src1}"), 3411 [(set _.RC:$dst, (_.VT 3412 (vselect_mask _.KRCWM:$mask, 3413 (_.VT (ld_frag addr:$src1)), 3414 (_.VT _.RC:$src0))))], _.ExeDomain>, 3415 EVEX, EVEX_K, Sched<[Sched.RM]>; 3416 } 3417 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3418 (ins _.KRCWM:$mask, _.MemOp:$src), 3419 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3420 "${dst} {${mask}} {z}, $src}", 3421 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3422 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3423 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3424 } 3425 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3426 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3427 3428 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3429 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3430 3431 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3432 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3433 _.KRCWM:$mask, addr:$ptr)>; 3434} 3435 3436multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3437 AVX512VLVectorVTInfo _, Predicate prd, 3438 X86SchedWriteMoveLSWidths Sched, 3439 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3440 let Predicates = [prd] in 3441 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3442 _.info512.AlignedLdFrag, masked_load_aligned, 3443 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3444 3445 let Predicates = [prd, HasVLX] in { 3446 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3447 _.info256.AlignedLdFrag, masked_load_aligned, 3448 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3449 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3450 _.info128.AlignedLdFrag, masked_load_aligned, 3451 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3452 } 3453} 3454 3455multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3456 AVX512VLVectorVTInfo _, Predicate prd, 3457 X86SchedWriteMoveLSWidths Sched, 3458 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3459 SDPatternOperator SelectOprr = vselect> { 3460 let Predicates = [prd] in 3461 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3462 masked_load, Sched.ZMM, "", 3463 NoRMPattern, SelectOprr>, EVEX_V512; 3464 3465 let Predicates = [prd, HasVLX] in { 3466 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3467 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3468 NoRMPattern, SelectOprr>, EVEX_V256; 3469 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3470 masked_load, Sched.XMM, EVEX2VEXOvrd, 3471 NoRMPattern, SelectOprr>, EVEX_V128; 3472 } 3473} 3474 3475multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3476 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3477 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3478 bit NoMRPattern = 0> { 3479 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3480 let isMoveReg = 1 in 3481 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3482 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3483 [], _.ExeDomain>, EVEX, 3484 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3485 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3486 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3487 (ins _.KRCWM:$mask, _.RC:$src), 3488 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3489 "${dst} {${mask}}, $src}", 3490 [], _.ExeDomain>, EVEX, EVEX_K, 3491 FoldGenData<BaseName#_.ZSuffix#rrk>, 3492 Sched<[Sched.RR]>; 3493 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3494 (ins _.KRCWM:$mask, _.RC:$src), 3495 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3496 "${dst} {${mask}} {z}, $src}", 3497 [], _.ExeDomain>, EVEX, EVEX_KZ, 3498 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3499 Sched<[Sched.RR]>; 3500 } 3501 3502 let hasSideEffects = 0, mayStore = 1 in 3503 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3504 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3505 !if(NoMRPattern, [], 3506 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3507 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3508 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3509 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3510 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3511 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3512 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3513 NotMemoryFoldable; 3514 3515 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3516 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3517 _.KRCWM:$mask, _.RC:$src)>; 3518 3519 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3520 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3521 _.RC:$dst, _.RC:$src), 0>; 3522 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3523 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3524 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3525 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3526 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3527 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3528} 3529 3530multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3531 AVX512VLVectorVTInfo _, Predicate prd, 3532 X86SchedWriteMoveLSWidths Sched, 3533 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3534 let Predicates = [prd] in 3535 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3536 masked_store, Sched.ZMM, "", 3537 NoMRPattern>, EVEX_V512; 3538 let Predicates = [prd, HasVLX] in { 3539 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3540 masked_store, Sched.YMM, 3541 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3542 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3543 masked_store, Sched.XMM, EVEX2VEXOvrd, 3544 NoMRPattern>, EVEX_V128; 3545 } 3546} 3547 3548multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3549 AVX512VLVectorVTInfo _, Predicate prd, 3550 X86SchedWriteMoveLSWidths Sched, 3551 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3552 let Predicates = [prd] in 3553 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3554 masked_store_aligned, Sched.ZMM, "", 3555 NoMRPattern>, EVEX_V512; 3556 3557 let Predicates = [prd, HasVLX] in { 3558 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3559 masked_store_aligned, Sched.YMM, 3560 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3561 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3562 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3563 NoMRPattern>, EVEX_V128; 3564 } 3565} 3566 3567defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3568 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3569 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3570 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3571 PS, EVEX_CD8<32, CD8VF>; 3572 3573defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3574 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3575 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3576 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3577 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3578 3579defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3580 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3581 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3582 SchedWriteFMoveLS, "VMOVUPS">, 3583 PS, EVEX_CD8<32, CD8VF>; 3584 3585defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3586 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3587 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3588 SchedWriteFMoveLS, "VMOVUPD">, 3589 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3590 3591defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3592 HasAVX512, SchedWriteVecMoveLS, 3593 "VMOVDQA", 1>, 3594 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3595 HasAVX512, SchedWriteVecMoveLS, 3596 "VMOVDQA", 1>, 3597 PD, EVEX_CD8<32, CD8VF>; 3598 3599defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3600 HasAVX512, SchedWriteVecMoveLS, 3601 "VMOVDQA">, 3602 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3603 HasAVX512, SchedWriteVecMoveLS, 3604 "VMOVDQA">, 3605 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3606 3607defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3608 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3609 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3610 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3611 XD, EVEX_CD8<8, CD8VF>; 3612 3613defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3614 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3615 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3616 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3617 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3618 3619defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3620 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3621 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3622 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3623 XS, EVEX_CD8<32, CD8VF>; 3624 3625defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3626 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3627 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3628 SchedWriteVecMoveLS, "VMOVDQU">, 3629 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3630 3631// Special instructions to help with spilling when we don't have VLX. We need 3632// to load or store from a ZMM register instead. These are converted in 3633// expandPostRAPseudos. 3634let isReMaterializable = 1, canFoldAsLoad = 1, 3635 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3636def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3637 "", []>, Sched<[WriteFLoadX]>; 3638def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3639 "", []>, Sched<[WriteFLoadY]>; 3640def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3641 "", []>, Sched<[WriteFLoadX]>; 3642def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3643 "", []>, Sched<[WriteFLoadY]>; 3644} 3645 3646let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3647def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3648 "", []>, Sched<[WriteFStoreX]>; 3649def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3650 "", []>, Sched<[WriteFStoreY]>; 3651def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3652 "", []>, Sched<[WriteFStoreX]>; 3653def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3654 "", []>, Sched<[WriteFStoreY]>; 3655} 3656 3657def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3658 (v8i64 VR512:$src))), 3659 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3660 VK8), VR512:$src)>; 3661 3662def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3663 (v16i32 VR512:$src))), 3664 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3665 3666// These patterns exist to prevent the above patterns from introducing a second 3667// mask inversion when one already exists. 3668def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), 3669 (v8i64 immAllZerosV), 3670 (v8i64 VR512:$src))), 3671 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3672def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), 3673 (v16i32 immAllZerosV), 3674 (v16i32 VR512:$src))), 3675 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3676 3677multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3678 X86VectorVTInfo Wide> { 3679 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3680 Narrow.RC:$src1, Narrow.RC:$src0)), 3681 (EXTRACT_SUBREG 3682 (Wide.VT 3683 (!cast<Instruction>(InstrStr#"rrk") 3684 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3685 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3686 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3687 Narrow.SubRegIdx)>; 3688 3689 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3690 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3691 (EXTRACT_SUBREG 3692 (Wide.VT 3693 (!cast<Instruction>(InstrStr#"rrkz") 3694 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3695 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3696 Narrow.SubRegIdx)>; 3697} 3698 3699// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3700// available. Use a 512-bit operation and extract. 3701let Predicates = [HasAVX512, NoVLX] in { 3702 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3703 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3704 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3705 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3706 3707 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3708 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3709 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3710 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3711} 3712 3713let Predicates = [HasBWI, NoVLX] in { 3714 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3715 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3716 3717 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3718 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3719} 3720 3721let Predicates = [HasAVX512] in { 3722 // 512-bit load. 3723 def : Pat<(alignedloadv16i32 addr:$src), 3724 (VMOVDQA64Zrm addr:$src)>; 3725 def : Pat<(alignedloadv32i16 addr:$src), 3726 (VMOVDQA64Zrm addr:$src)>; 3727 def : Pat<(alignedloadv64i8 addr:$src), 3728 (VMOVDQA64Zrm addr:$src)>; 3729 def : Pat<(loadv16i32 addr:$src), 3730 (VMOVDQU64Zrm addr:$src)>; 3731 def : Pat<(loadv32i16 addr:$src), 3732 (VMOVDQU64Zrm addr:$src)>; 3733 def : Pat<(loadv64i8 addr:$src), 3734 (VMOVDQU64Zrm addr:$src)>; 3735 3736 // 512-bit store. 3737 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3738 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3739 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3740 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3741 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3742 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3743 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3744 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3745 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3746 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3747 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3748 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3749} 3750 3751let Predicates = [HasVLX] in { 3752 // 128-bit load. 3753 def : Pat<(alignedloadv4i32 addr:$src), 3754 (VMOVDQA64Z128rm addr:$src)>; 3755 def : Pat<(alignedloadv8i16 addr:$src), 3756 (VMOVDQA64Z128rm addr:$src)>; 3757 def : Pat<(alignedloadv16i8 addr:$src), 3758 (VMOVDQA64Z128rm addr:$src)>; 3759 def : Pat<(loadv4i32 addr:$src), 3760 (VMOVDQU64Z128rm addr:$src)>; 3761 def : Pat<(loadv8i16 addr:$src), 3762 (VMOVDQU64Z128rm addr:$src)>; 3763 def : Pat<(loadv16i8 addr:$src), 3764 (VMOVDQU64Z128rm addr:$src)>; 3765 3766 // 128-bit store. 3767 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3768 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3769 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3770 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3771 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3772 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3773 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3774 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3775 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3776 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3777 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3778 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3779 3780 // 256-bit load. 3781 def : Pat<(alignedloadv8i32 addr:$src), 3782 (VMOVDQA64Z256rm addr:$src)>; 3783 def : Pat<(alignedloadv16i16 addr:$src), 3784 (VMOVDQA64Z256rm addr:$src)>; 3785 def : Pat<(alignedloadv32i8 addr:$src), 3786 (VMOVDQA64Z256rm addr:$src)>; 3787 def : Pat<(loadv8i32 addr:$src), 3788 (VMOVDQU64Z256rm addr:$src)>; 3789 def : Pat<(loadv16i16 addr:$src), 3790 (VMOVDQU64Z256rm addr:$src)>; 3791 def : Pat<(loadv32i8 addr:$src), 3792 (VMOVDQU64Z256rm addr:$src)>; 3793 3794 // 256-bit store. 3795 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3796 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3797 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3798 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3799 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3800 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3801 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3802 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3803 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3804 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3805 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3806 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3807} 3808 3809// Move Int Doubleword to Packed Double Int 3810// 3811let ExeDomain = SSEPackedInt in { 3812def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3813 "vmovd\t{$src, $dst|$dst, $src}", 3814 [(set VR128X:$dst, 3815 (v4i32 (scalar_to_vector GR32:$src)))]>, 3816 EVEX, Sched<[WriteVecMoveFromGpr]>; 3817def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3818 "vmovd\t{$src, $dst|$dst, $src}", 3819 [(set VR128X:$dst, 3820 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3821 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3822def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3823 "vmovq\t{$src, $dst|$dst, $src}", 3824 [(set VR128X:$dst, 3825 (v2i64 (scalar_to_vector GR64:$src)))]>, 3826 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3827let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3828def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3829 (ins i64mem:$src), 3830 "vmovq\t{$src, $dst|$dst, $src}", []>, 3831 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3832let isCodeGenOnly = 1 in { 3833def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3834 "vmovq\t{$src, $dst|$dst, $src}", 3835 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3836 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3837def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3838 "vmovq\t{$src, $dst|$dst, $src}", 3839 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3840 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3841} 3842} // ExeDomain = SSEPackedInt 3843 3844// Move Int Doubleword to Single Scalar 3845// 3846let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3847def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3848 "vmovd\t{$src, $dst|$dst, $src}", 3849 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3850 EVEX, Sched<[WriteVecMoveFromGpr]>; 3851} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3852 3853// Move doubleword from xmm register to r/m32 3854// 3855let ExeDomain = SSEPackedInt in { 3856def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3857 "vmovd\t{$src, $dst|$dst, $src}", 3858 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3859 (iPTR 0)))]>, 3860 EVEX, Sched<[WriteVecMoveToGpr]>; 3861def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3862 (ins i32mem:$dst, VR128X:$src), 3863 "vmovd\t{$src, $dst|$dst, $src}", 3864 [(store (i32 (extractelt (v4i32 VR128X:$src), 3865 (iPTR 0))), addr:$dst)]>, 3866 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3867} // ExeDomain = SSEPackedInt 3868 3869// Move quadword from xmm1 register to r/m64 3870// 3871let ExeDomain = SSEPackedInt in { 3872def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3873 "vmovq\t{$src, $dst|$dst, $src}", 3874 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3875 (iPTR 0)))]>, 3876 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 3877 Requires<[HasAVX512]>; 3878 3879let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3880def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3881 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 3882 EVEX, VEX_W, Sched<[WriteVecStore]>, 3883 Requires<[HasAVX512, In64BitMode]>; 3884 3885def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3886 (ins i64mem:$dst, VR128X:$src), 3887 "vmovq\t{$src, $dst|$dst, $src}", 3888 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3889 addr:$dst)]>, 3890 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 3891 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3892 3893let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3894def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3895 (ins VR128X:$src), 3896 "vmovq\t{$src, $dst|$dst, $src}", []>, 3897 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 3898} // ExeDomain = SSEPackedInt 3899 3900def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3901 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3902 3903let Predicates = [HasAVX512] in { 3904 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3905 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3906} 3907 3908// Move Scalar Single to Double Int 3909// 3910let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3911def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3912 (ins FR32X:$src), 3913 "vmovd\t{$src, $dst|$dst, $src}", 3914 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3915 EVEX, Sched<[WriteVecMoveToGpr]>; 3916} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3917 3918// Move Quadword Int to Packed Quadword Int 3919// 3920let ExeDomain = SSEPackedInt in { 3921def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3922 (ins i64mem:$src), 3923 "vmovq\t{$src, $dst|$dst, $src}", 3924 [(set VR128X:$dst, 3925 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3926 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3927} // ExeDomain = SSEPackedInt 3928 3929// Allow "vmovd" but print "vmovq". 3930def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3931 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3932def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3933 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3934 3935// Conversions between masks and scalar fp. 3936def : Pat<(v32i1 (bitconvert FR32X:$src)), 3937 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 3938def : Pat<(f32 (bitconvert VK32:$src)), 3939 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 3940 3941def : Pat<(v64i1 (bitconvert FR64X:$src)), 3942 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 3943def : Pat<(f64 (bitconvert VK64:$src)), 3944 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 3945 3946//===----------------------------------------------------------------------===// 3947// AVX-512 MOVSS, MOVSD 3948//===----------------------------------------------------------------------===// 3949 3950multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3951 X86VectorVTInfo _> { 3952 let Predicates = [HasAVX512, OptForSize] in 3953 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3954 (ins _.RC:$src1, _.RC:$src2), 3955 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3956 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3957 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 3958 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3959 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3960 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3961 "$dst {${mask}} {z}, $src1, $src2}"), 3962 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3963 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3964 _.ImmAllZerosV)))], 3965 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3966 let Constraints = "$src0 = $dst" in 3967 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3968 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3969 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3970 "$dst {${mask}}, $src1, $src2}"), 3971 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3972 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3973 (_.VT _.RC:$src0))))], 3974 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3975 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3976 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3977 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3978 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3979 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3980 // _alt version uses FR32/FR64 register class. 3981 let isCodeGenOnly = 1 in 3982 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3983 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3984 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3985 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3986 } 3987 let mayLoad = 1, hasSideEffects = 0 in { 3988 let Constraints = "$src0 = $dst" in 3989 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3990 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3991 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3992 "$dst {${mask}}, $src}"), 3993 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3994 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3995 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3996 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3997 "$dst {${mask}} {z}, $src}"), 3998 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3999 } 4000 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 4001 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4002 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 4003 EVEX, Sched<[WriteFStore]>; 4004 let mayStore = 1, hasSideEffects = 0 in 4005 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 4006 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 4007 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 4008 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 4009 NotMemoryFoldable; 4010} 4011 4012defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 4013 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 4014 4015defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 4016 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4017 4018 4019multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4020 PatLeaf ZeroFP, X86VectorVTInfo _> { 4021 4022def : Pat<(_.VT (OpNode _.RC:$src0, 4023 (_.VT (scalar_to_vector 4024 (_.EltVT (X86selects VK1WM:$mask, 4025 (_.EltVT _.FRC:$src1), 4026 (_.EltVT _.FRC:$src2))))))), 4027 (!cast<Instruction>(InstrStr#rrk) 4028 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4029 VK1WM:$mask, 4030 (_.VT _.RC:$src0), 4031 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4032 4033def : Pat<(_.VT (OpNode _.RC:$src0, 4034 (_.VT (scalar_to_vector 4035 (_.EltVT (X86selects VK1WM:$mask, 4036 (_.EltVT _.FRC:$src1), 4037 (_.EltVT ZeroFP))))))), 4038 (!cast<Instruction>(InstrStr#rrkz) 4039 VK1WM:$mask, 4040 (_.VT _.RC:$src0), 4041 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4042} 4043 4044multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4045 dag Mask, RegisterClass MaskRC> { 4046 4047def : Pat<(masked_store 4048 (_.info512.VT (insert_subvector undef, 4049 (_.info128.VT _.info128.RC:$src), 4050 (iPTR 0))), addr:$dst, Mask), 4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4052 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4053 _.info128.RC:$src)>; 4054 4055} 4056 4057multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4058 AVX512VLVectorVTInfo _, 4059 dag Mask, RegisterClass MaskRC, 4060 SubRegIndex subreg> { 4061 4062def : Pat<(masked_store 4063 (_.info512.VT (insert_subvector undef, 4064 (_.info128.VT _.info128.RC:$src), 4065 (iPTR 0))), addr:$dst, Mask), 4066 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4067 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4068 _.info128.RC:$src)>; 4069 4070} 4071 4072// This matches the more recent codegen from clang that avoids emitting a 512 4073// bit masked store directly. Codegen will widen 128-bit masked store to 512 4074// bits on AVX512F only targets. 4075multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4076 AVX512VLVectorVTInfo _, 4077 dag Mask512, dag Mask128, 4078 RegisterClass MaskRC, 4079 SubRegIndex subreg> { 4080 4081// AVX512F pattern. 4082def : Pat<(masked_store 4083 (_.info512.VT (insert_subvector undef, 4084 (_.info128.VT _.info128.RC:$src), 4085 (iPTR 0))), addr:$dst, Mask512), 4086 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4087 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4088 _.info128.RC:$src)>; 4089 4090// AVX512VL pattern. 4091def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4092 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4093 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4094 _.info128.RC:$src)>; 4095} 4096 4097multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4098 dag Mask, RegisterClass MaskRC> { 4099 4100def : Pat<(_.info128.VT (extract_subvector 4101 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4102 _.info512.ImmAllZerosV)), 4103 (iPTR 0))), 4104 (!cast<Instruction>(InstrStr#rmkz) 4105 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4106 addr:$srcAddr)>; 4107 4108def : Pat<(_.info128.VT (extract_subvector 4109 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4110 (_.info512.VT (insert_subvector undef, 4111 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4112 (iPTR 0))))), 4113 (iPTR 0))), 4114 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4115 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4116 addr:$srcAddr)>; 4117 4118} 4119 4120multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4121 AVX512VLVectorVTInfo _, 4122 dag Mask, RegisterClass MaskRC, 4123 SubRegIndex subreg> { 4124 4125def : Pat<(_.info128.VT (extract_subvector 4126 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4127 _.info512.ImmAllZerosV)), 4128 (iPTR 0))), 4129 (!cast<Instruction>(InstrStr#rmkz) 4130 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4131 addr:$srcAddr)>; 4132 4133def : Pat<(_.info128.VT (extract_subvector 4134 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4135 (_.info512.VT (insert_subvector undef, 4136 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4137 (iPTR 0))))), 4138 (iPTR 0))), 4139 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4140 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4141 addr:$srcAddr)>; 4142 4143} 4144 4145// This matches the more recent codegen from clang that avoids emitting a 512 4146// bit masked load directly. Codegen will widen 128-bit masked load to 512 4147// bits on AVX512F only targets. 4148multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4149 AVX512VLVectorVTInfo _, 4150 dag Mask512, dag Mask128, 4151 RegisterClass MaskRC, 4152 SubRegIndex subreg> { 4153// AVX512F patterns. 4154def : Pat<(_.info128.VT (extract_subvector 4155 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4156 _.info512.ImmAllZerosV)), 4157 (iPTR 0))), 4158 (!cast<Instruction>(InstrStr#rmkz) 4159 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4160 addr:$srcAddr)>; 4161 4162def : Pat<(_.info128.VT (extract_subvector 4163 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4164 (_.info512.VT (insert_subvector undef, 4165 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4166 (iPTR 0))))), 4167 (iPTR 0))), 4168 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4169 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4170 addr:$srcAddr)>; 4171 4172// AVX512Vl patterns. 4173def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4174 _.info128.ImmAllZerosV)), 4175 (!cast<Instruction>(InstrStr#rmkz) 4176 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4177 addr:$srcAddr)>; 4178 4179def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4180 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4181 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4182 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4183 addr:$srcAddr)>; 4184} 4185 4186defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4187defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4188 4189defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4190 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4191defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4192 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4193defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4194 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4195 4196defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4197 (v16i1 (insert_subvector 4198 (v16i1 immAllZerosV), 4199 (v4i1 (extract_subvector 4200 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4201 (iPTR 0))), 4202 (iPTR 0))), 4203 (v4i1 (extract_subvector 4204 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4205 (iPTR 0))), GR8, sub_8bit>; 4206defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4207 (v8i1 4208 (extract_subvector 4209 (v16i1 4210 (insert_subvector 4211 (v16i1 immAllZerosV), 4212 (v2i1 (extract_subvector 4213 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4214 (iPTR 0))), 4215 (iPTR 0))), 4216 (iPTR 0))), 4217 (v2i1 (extract_subvector 4218 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4219 (iPTR 0))), GR8, sub_8bit>; 4220 4221defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4222 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4223defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4224 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4225defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4226 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4227 4228defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4229 (v16i1 (insert_subvector 4230 (v16i1 immAllZerosV), 4231 (v4i1 (extract_subvector 4232 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4233 (iPTR 0))), 4234 (iPTR 0))), 4235 (v4i1 (extract_subvector 4236 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4237 (iPTR 0))), GR8, sub_8bit>; 4238defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4239 (v8i1 4240 (extract_subvector 4241 (v16i1 4242 (insert_subvector 4243 (v16i1 immAllZerosV), 4244 (v2i1 (extract_subvector 4245 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4246 (iPTR 0))), 4247 (iPTR 0))), 4248 (iPTR 0))), 4249 (v2i1 (extract_subvector 4250 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4251 (iPTR 0))), GR8, sub_8bit>; 4252 4253def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4254 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4255 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4256 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4257 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4258 4259def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4260 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4261 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4262 4263def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4264 (COPY_TO_REGCLASS 4265 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4266 VK1WM:$mask, addr:$src)), 4267 FR32X)>; 4268def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4269 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4270 4271def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4272 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4273 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4274 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4275 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4276 4277def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4278 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4279 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4280 4281def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4282 (COPY_TO_REGCLASS 4283 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4284 VK1WM:$mask, addr:$src)), 4285 FR64X)>; 4286def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4287 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4288 4289 4290def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4291 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4292def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4293 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4294 4295def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4296 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4297def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4298 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4299 4300let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4301 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4302 (ins VR128X:$src1, VR128X:$src2), 4303 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4304 []>, XS, EVEX_4V, VEX_LIG, 4305 FoldGenData<"VMOVSSZrr">, 4306 Sched<[SchedWriteFShuffle.XMM]>; 4307 4308 let Constraints = "$src0 = $dst" in 4309 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4310 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4311 VR128X:$src1, VR128X:$src2), 4312 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4313 "$dst {${mask}}, $src1, $src2}", 4314 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4315 FoldGenData<"VMOVSSZrrk">, 4316 Sched<[SchedWriteFShuffle.XMM]>; 4317 4318 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4319 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4320 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4321 "$dst {${mask}} {z}, $src1, $src2}", 4322 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4323 FoldGenData<"VMOVSSZrrkz">, 4324 Sched<[SchedWriteFShuffle.XMM]>; 4325 4326 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4327 (ins VR128X:$src1, VR128X:$src2), 4328 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4329 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4330 FoldGenData<"VMOVSDZrr">, 4331 Sched<[SchedWriteFShuffle.XMM]>; 4332 4333 let Constraints = "$src0 = $dst" in 4334 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4335 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4336 VR128X:$src1, VR128X:$src2), 4337 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4338 "$dst {${mask}}, $src1, $src2}", 4339 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4340 VEX_W, FoldGenData<"VMOVSDZrrk">, 4341 Sched<[SchedWriteFShuffle.XMM]>; 4342 4343 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4344 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4345 VR128X:$src2), 4346 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4347 "$dst {${mask}} {z}, $src1, $src2}", 4348 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4349 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4350 Sched<[SchedWriteFShuffle.XMM]>; 4351} 4352 4353def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4354 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4355def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4356 "$dst {${mask}}, $src1, $src2}", 4357 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4358 VR128X:$src1, VR128X:$src2), 0>; 4359def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4360 "$dst {${mask}} {z}, $src1, $src2}", 4361 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4362 VR128X:$src1, VR128X:$src2), 0>; 4363def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4364 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4365def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4366 "$dst {${mask}}, $src1, $src2}", 4367 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4368 VR128X:$src1, VR128X:$src2), 0>; 4369def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4370 "$dst {${mask}} {z}, $src1, $src2}", 4371 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4372 VR128X:$src1, VR128X:$src2), 0>; 4373 4374let Predicates = [HasAVX512, OptForSize] in { 4375 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4376 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4377 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4378 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4379 4380 // Move low f32 and clear high bits. 4381 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4382 (SUBREG_TO_REG (i32 0), 4383 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4384 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4385 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4386 (SUBREG_TO_REG (i32 0), 4387 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4388 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4389 4390 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4391 (SUBREG_TO_REG (i32 0), 4392 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4393 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4394 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4395 (SUBREG_TO_REG (i32 0), 4396 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4397 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4398} 4399 4400// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4401// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4402let Predicates = [HasAVX512, OptForSpeed] in { 4403 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4404 (SUBREG_TO_REG (i32 0), 4405 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4406 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4407 (i8 1))), sub_xmm)>; 4408 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4409 (SUBREG_TO_REG (i32 0), 4410 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4411 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4412 (i8 3))), sub_xmm)>; 4413} 4414 4415let Predicates = [HasAVX512] in { 4416 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4417 (VMOVSSZrm addr:$src)>; 4418 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4419 (VMOVSDZrm addr:$src)>; 4420 4421 // Represent the same patterns above but in the form they appear for 4422 // 256-bit types 4423 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4424 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4425 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4426 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4427 4428 // Represent the same patterns above but in the form they appear for 4429 // 512-bit types 4430 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4431 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4432 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4433 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4434} 4435 4436let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4437def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4438 (ins VR128X:$src), 4439 "vmovq\t{$src, $dst|$dst, $src}", 4440 [(set VR128X:$dst, (v2i64 (X86vzmovl 4441 (v2i64 VR128X:$src))))]>, 4442 EVEX, VEX_W; 4443} 4444 4445let Predicates = [HasAVX512] in { 4446 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4447 (VMOVDI2PDIZrr GR32:$src)>; 4448 4449 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4450 (VMOV64toPQIZrr GR64:$src)>; 4451 4452 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4453 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4454 (VMOVDI2PDIZrm addr:$src)>; 4455 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4456 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4457 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4458 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4459 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4460 (VMOVQI2PQIZrm addr:$src)>; 4461 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4462 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4463 4464 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4465 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4466 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4467 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4468 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4469 4470 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4471 (SUBREG_TO_REG (i32 0), 4472 (v2f64 (VMOVZPQILo2PQIZrr 4473 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4474 sub_xmm)>; 4475 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4476 (SUBREG_TO_REG (i32 0), 4477 (v2i64 (VMOVZPQILo2PQIZrr 4478 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4479 sub_xmm)>; 4480 4481 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4482 (SUBREG_TO_REG (i32 0), 4483 (v2f64 (VMOVZPQILo2PQIZrr 4484 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4485 sub_xmm)>; 4486 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4487 (SUBREG_TO_REG (i32 0), 4488 (v2i64 (VMOVZPQILo2PQIZrr 4489 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4490 sub_xmm)>; 4491} 4492 4493//===----------------------------------------------------------------------===// 4494// AVX-512 - Non-temporals 4495//===----------------------------------------------------------------------===// 4496 4497def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4498 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4499 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4500 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4501 4502let Predicates = [HasVLX] in { 4503 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4504 (ins i256mem:$src), 4505 "vmovntdqa\t{$src, $dst|$dst, $src}", 4506 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4507 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4508 4509 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4510 (ins i128mem:$src), 4511 "vmovntdqa\t{$src, $dst|$dst, $src}", 4512 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4513 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4514} 4515 4516multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4517 X86SchedWriteMoveLS Sched, 4518 PatFrag st_frag = alignednontemporalstore> { 4519 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4520 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4521 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4522 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4523 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4524} 4525 4526multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4527 AVX512VLVectorVTInfo VTInfo, 4528 X86SchedWriteMoveLSWidths Sched> { 4529 let Predicates = [HasAVX512] in 4530 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4531 4532 let Predicates = [HasAVX512, HasVLX] in { 4533 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4534 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4535 } 4536} 4537 4538defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4539 SchedWriteVecMoveLSNT>, PD; 4540defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4541 SchedWriteFMoveLSNT>, PD, VEX_W; 4542defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4543 SchedWriteFMoveLSNT>, PS; 4544 4545let Predicates = [HasAVX512], AddedComplexity = 400 in { 4546 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4547 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4548 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4549 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4550 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4551 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4552 4553 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4554 (VMOVNTDQAZrm addr:$src)>; 4555 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4556 (VMOVNTDQAZrm addr:$src)>; 4557 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4558 (VMOVNTDQAZrm addr:$src)>; 4559 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4560 (VMOVNTDQAZrm addr:$src)>; 4561 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4562 (VMOVNTDQAZrm addr:$src)>; 4563 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4564 (VMOVNTDQAZrm addr:$src)>; 4565} 4566 4567let Predicates = [HasVLX], AddedComplexity = 400 in { 4568 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4569 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4570 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4571 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4572 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4573 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4574 4575 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4576 (VMOVNTDQAZ256rm addr:$src)>; 4577 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4578 (VMOVNTDQAZ256rm addr:$src)>; 4579 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4580 (VMOVNTDQAZ256rm addr:$src)>; 4581 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4582 (VMOVNTDQAZ256rm addr:$src)>; 4583 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4584 (VMOVNTDQAZ256rm addr:$src)>; 4585 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4586 (VMOVNTDQAZ256rm addr:$src)>; 4587 4588 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4589 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4590 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4591 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4592 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4593 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4594 4595 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4596 (VMOVNTDQAZ128rm addr:$src)>; 4597 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4598 (VMOVNTDQAZ128rm addr:$src)>; 4599 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4600 (VMOVNTDQAZ128rm addr:$src)>; 4601 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4602 (VMOVNTDQAZ128rm addr:$src)>; 4603 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4604 (VMOVNTDQAZ128rm addr:$src)>; 4605 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4606 (VMOVNTDQAZ128rm addr:$src)>; 4607} 4608 4609//===----------------------------------------------------------------------===// 4610// AVX-512 - Integer arithmetic 4611// 4612multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4613 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4614 bit IsCommutable = 0> { 4615 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4616 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4617 "$src2, $src1", "$src1, $src2", 4618 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4619 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4620 Sched<[sched]>; 4621 4622 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4623 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4624 "$src2, $src1", "$src1, $src2", 4625 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4626 AVX512BIBase, EVEX_4V, 4627 Sched<[sched.Folded, sched.ReadAfterFold]>; 4628} 4629 4630multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4631 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4632 bit IsCommutable = 0> : 4633 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4634 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4635 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4636 "${src2}"#_.BroadcastStr#", $src1", 4637 "$src1, ${src2}"#_.BroadcastStr, 4638 (_.VT (OpNode _.RC:$src1, 4639 (_.BroadcastLdFrag addr:$src2)))>, 4640 AVX512BIBase, EVEX_4V, EVEX_B, 4641 Sched<[sched.Folded, sched.ReadAfterFold]>; 4642} 4643 4644multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4645 AVX512VLVectorVTInfo VTInfo, 4646 X86SchedWriteWidths sched, Predicate prd, 4647 bit IsCommutable = 0> { 4648 let Predicates = [prd] in 4649 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4650 IsCommutable>, EVEX_V512; 4651 4652 let Predicates = [prd, HasVLX] in { 4653 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4654 sched.YMM, IsCommutable>, EVEX_V256; 4655 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4656 sched.XMM, IsCommutable>, EVEX_V128; 4657 } 4658} 4659 4660multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4661 AVX512VLVectorVTInfo VTInfo, 4662 X86SchedWriteWidths sched, Predicate prd, 4663 bit IsCommutable = 0> { 4664 let Predicates = [prd] in 4665 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4666 IsCommutable>, EVEX_V512; 4667 4668 let Predicates = [prd, HasVLX] in { 4669 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4670 sched.YMM, IsCommutable>, EVEX_V256; 4671 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4672 sched.XMM, IsCommutable>, EVEX_V128; 4673 } 4674} 4675 4676multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4677 X86SchedWriteWidths sched, Predicate prd, 4678 bit IsCommutable = 0> { 4679 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4680 sched, prd, IsCommutable>, 4681 VEX_W, EVEX_CD8<64, CD8VF>; 4682} 4683 4684multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4685 X86SchedWriteWidths sched, Predicate prd, 4686 bit IsCommutable = 0> { 4687 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4688 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4689} 4690 4691multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4692 X86SchedWriteWidths sched, Predicate prd, 4693 bit IsCommutable = 0> { 4694 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4695 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4696 VEX_WIG; 4697} 4698 4699multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4700 X86SchedWriteWidths sched, Predicate prd, 4701 bit IsCommutable = 0> { 4702 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4703 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4704 VEX_WIG; 4705} 4706 4707multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4708 SDNode OpNode, X86SchedWriteWidths sched, 4709 Predicate prd, bit IsCommutable = 0> { 4710 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4711 IsCommutable>; 4712 4713 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4714 IsCommutable>; 4715} 4716 4717multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4718 SDNode OpNode, X86SchedWriteWidths sched, 4719 Predicate prd, bit IsCommutable = 0> { 4720 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4721 IsCommutable>; 4722 4723 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4724 IsCommutable>; 4725} 4726 4727multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4728 bits<8> opc_d, bits<8> opc_q, 4729 string OpcodeStr, SDNode OpNode, 4730 X86SchedWriteWidths sched, 4731 bit IsCommutable = 0> { 4732 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4733 sched, HasAVX512, IsCommutable>, 4734 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4735 sched, HasBWI, IsCommutable>; 4736} 4737 4738multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4739 X86FoldableSchedWrite sched, 4740 SDNode OpNode,X86VectorVTInfo _Src, 4741 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4742 bit IsCommutable = 0> { 4743 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4744 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4745 "$src2, $src1","$src1, $src2", 4746 (_Dst.VT (OpNode 4747 (_Src.VT _Src.RC:$src1), 4748 (_Src.VT _Src.RC:$src2))), 4749 IsCommutable>, 4750 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4751 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4752 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4753 "$src2, $src1", "$src1, $src2", 4754 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4755 (_Src.LdFrag addr:$src2)))>, 4756 AVX512BIBase, EVEX_4V, 4757 Sched<[sched.Folded, sched.ReadAfterFold]>; 4758 4759 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4760 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4761 OpcodeStr, 4762 "${src2}"#_Brdct.BroadcastStr#", $src1", 4763 "$src1, ${src2}"#_Brdct.BroadcastStr, 4764 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4765 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4766 AVX512BIBase, EVEX_4V, EVEX_B, 4767 Sched<[sched.Folded, sched.ReadAfterFold]>; 4768} 4769 4770defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4771 SchedWriteVecALU, 1>; 4772defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4773 SchedWriteVecALU, 0>; 4774defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4775 SchedWriteVecALU, HasBWI, 1>; 4776defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4777 SchedWriteVecALU, HasBWI, 0>; 4778defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4779 SchedWriteVecALU, HasBWI, 1>; 4780defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4781 SchedWriteVecALU, HasBWI, 0>; 4782defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4783 SchedWritePMULLD, HasAVX512, 1>, T8PD; 4784defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4785 SchedWriteVecIMul, HasBWI, 1>; 4786defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4787 SchedWriteVecIMul, HasDQI, 1>, T8PD, 4788 NotEVEX2VEXConvertible; 4789defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4790 HasBWI, 1>; 4791defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4792 HasBWI, 1>; 4793defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4794 SchedWriteVecIMul, HasBWI, 1>, T8PD; 4795defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 4796 SchedWriteVecALU, HasBWI, 1>; 4797defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4798 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 4799defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4800 SchedWriteVecIMul, HasAVX512, 1>; 4801 4802multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4803 X86SchedWriteWidths sched, 4804 AVX512VLVectorVTInfo _SrcVTInfo, 4805 AVX512VLVectorVTInfo _DstVTInfo, 4806 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4807 let Predicates = [prd] in 4808 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4809 _SrcVTInfo.info512, _DstVTInfo.info512, 4810 v8i64_info, IsCommutable>, 4811 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 4812 let Predicates = [HasVLX, prd] in { 4813 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4814 _SrcVTInfo.info256, _DstVTInfo.info256, 4815 v4i64x_info, IsCommutable>, 4816 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 4817 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4818 _SrcVTInfo.info128, _DstVTInfo.info128, 4819 v2i64x_info, IsCommutable>, 4820 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 4821 } 4822} 4823 4824defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4825 avx512vl_i8_info, avx512vl_i8_info, 4826 X86multishift, HasVBMI, 0>, T8PD; 4827 4828multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4829 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4830 X86FoldableSchedWrite sched> { 4831 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4832 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4833 OpcodeStr, 4834 "${src2}"#_Src.BroadcastStr#", $src1", 4835 "$src1, ${src2}"#_Src.BroadcastStr, 4836 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4837 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4838 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4839 Sched<[sched.Folded, sched.ReadAfterFold]>; 4840} 4841 4842multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4843 SDNode OpNode,X86VectorVTInfo _Src, 4844 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4845 bit IsCommutable = 0> { 4846 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4847 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4848 "$src2, $src1","$src1, $src2", 4849 (_Dst.VT (OpNode 4850 (_Src.VT _Src.RC:$src1), 4851 (_Src.VT _Src.RC:$src2))), 4852 IsCommutable, IsCommutable>, 4853 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 4854 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4855 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4856 "$src2, $src1", "$src1, $src2", 4857 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4858 (_Src.LdFrag addr:$src2)))>, 4859 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 4860 Sched<[sched.Folded, sched.ReadAfterFold]>; 4861} 4862 4863multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4864 SDNode OpNode> { 4865 let Predicates = [HasBWI] in 4866 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4867 v32i16_info, SchedWriteShuffle.ZMM>, 4868 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4869 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4870 let Predicates = [HasBWI, HasVLX] in { 4871 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4872 v16i16x_info, SchedWriteShuffle.YMM>, 4873 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4874 v16i16x_info, SchedWriteShuffle.YMM>, 4875 EVEX_V256; 4876 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4877 v8i16x_info, SchedWriteShuffle.XMM>, 4878 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4879 v8i16x_info, SchedWriteShuffle.XMM>, 4880 EVEX_V128; 4881 } 4882} 4883multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4884 SDNode OpNode> { 4885 let Predicates = [HasBWI] in 4886 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4887 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 4888 let Predicates = [HasBWI, HasVLX] in { 4889 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4890 v32i8x_info, SchedWriteShuffle.YMM>, 4891 EVEX_V256, VEX_WIG; 4892 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4893 v16i8x_info, SchedWriteShuffle.XMM>, 4894 EVEX_V128, VEX_WIG; 4895 } 4896} 4897 4898multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4899 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4900 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4901 let Predicates = [HasBWI] in 4902 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4903 _Dst.info512, SchedWriteVecIMul.ZMM, 4904 IsCommutable>, EVEX_V512; 4905 let Predicates = [HasBWI, HasVLX] in { 4906 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4907 _Dst.info256, SchedWriteVecIMul.YMM, 4908 IsCommutable>, EVEX_V256; 4909 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4910 _Dst.info128, SchedWriteVecIMul.XMM, 4911 IsCommutable>, EVEX_V128; 4912 } 4913} 4914 4915defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4916defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4917defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4918defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4919 4920defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4921 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 4922defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4923 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 4924 4925defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4926 SchedWriteVecALU, HasBWI, 1>, T8PD; 4927defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4928 SchedWriteVecALU, HasBWI, 1>; 4929defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4930 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4931defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4932 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4933 NotEVEX2VEXConvertible; 4934 4935defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4936 SchedWriteVecALU, HasBWI, 1>; 4937defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4938 SchedWriteVecALU, HasBWI, 1>, T8PD; 4939defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4940 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4941defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4942 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4943 NotEVEX2VEXConvertible; 4944 4945defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4946 SchedWriteVecALU, HasBWI, 1>, T8PD; 4947defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4948 SchedWriteVecALU, HasBWI, 1>; 4949defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4950 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4951defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4952 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4953 NotEVEX2VEXConvertible; 4954 4955defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4956 SchedWriteVecALU, HasBWI, 1>; 4957defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4958 SchedWriteVecALU, HasBWI, 1>, T8PD; 4959defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4960 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4961defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4962 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4963 NotEVEX2VEXConvertible; 4964 4965// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4966let Predicates = [HasDQI, NoVLX] in { 4967 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4968 (EXTRACT_SUBREG 4969 (VPMULLQZrr 4970 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4971 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4972 sub_ymm)>; 4973 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 4974 (EXTRACT_SUBREG 4975 (VPMULLQZrmb 4976 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4977 addr:$src2), 4978 sub_ymm)>; 4979 4980 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 4981 (EXTRACT_SUBREG 4982 (VPMULLQZrr 4983 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4984 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 4985 sub_xmm)>; 4986 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 4987 (EXTRACT_SUBREG 4988 (VPMULLQZrmb 4989 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 4990 addr:$src2), 4991 sub_xmm)>; 4992} 4993 4994multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 4995 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 4996 (EXTRACT_SUBREG 4997 (!cast<Instruction>(Instr#"rr") 4998 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4999 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5000 sub_ymm)>; 5001 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5002 (EXTRACT_SUBREG 5003 (!cast<Instruction>(Instr#"rmb") 5004 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5005 addr:$src2), 5006 sub_ymm)>; 5007 5008 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5009 (EXTRACT_SUBREG 5010 (!cast<Instruction>(Instr#"rr") 5011 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5012 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5013 sub_xmm)>; 5014 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5015 (EXTRACT_SUBREG 5016 (!cast<Instruction>(Instr#"rmb") 5017 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5018 addr:$src2), 5019 sub_xmm)>; 5020} 5021 5022let Predicates = [HasAVX512, NoVLX] in { 5023 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5024 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5025 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5026 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5027} 5028 5029//===----------------------------------------------------------------------===// 5030// AVX-512 Logical Instructions 5031//===----------------------------------------------------------------------===// 5032 5033defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5034 SchedWriteVecLogic, HasAVX512, 1>; 5035defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5036 SchedWriteVecLogic, HasAVX512, 1>; 5037defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5038 SchedWriteVecLogic, HasAVX512, 1>; 5039defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5040 SchedWriteVecLogic, HasAVX512>; 5041 5042let Predicates = [HasVLX] in { 5043 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5044 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5045 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5046 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5047 5048 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5049 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5050 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5051 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5052 5053 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5054 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5055 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5056 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5057 5058 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5059 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5060 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5061 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5062 5063 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5064 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5065 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5066 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5067 5068 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5069 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5070 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5071 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5072 5073 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5074 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5075 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5076 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5077 5078 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5079 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5080 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5081 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5082 5083 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5084 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5085 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5086 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5087 5088 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5089 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5090 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5091 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5092 5093 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5094 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5095 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5096 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5097 5098 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5099 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5100 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5101 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5102 5103 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5104 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5105 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5106 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5107 5108 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5109 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5110 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5111 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5112 5113 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5114 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5115 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5116 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5117 5118 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5119 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5120 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5121 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5122} 5123 5124let Predicates = [HasAVX512] in { 5125 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5126 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5127 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5128 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5129 5130 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5131 (VPORQZrr VR512:$src1, VR512:$src2)>; 5132 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5133 (VPORQZrr VR512:$src1, VR512:$src2)>; 5134 5135 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5136 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5137 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5138 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5139 5140 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5141 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5142 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5143 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5144 5145 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5146 (VPANDQZrm VR512:$src1, addr:$src2)>; 5147 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5148 (VPANDQZrm VR512:$src1, addr:$src2)>; 5149 5150 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5151 (VPORQZrm VR512:$src1, addr:$src2)>; 5152 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5153 (VPORQZrm VR512:$src1, addr:$src2)>; 5154 5155 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5156 (VPXORQZrm VR512:$src1, addr:$src2)>; 5157 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5158 (VPXORQZrm VR512:$src1, addr:$src2)>; 5159 5160 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5161 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5162 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5163 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5164} 5165 5166// Patterns to catch vselect with different type than logic op. 5167multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5168 X86VectorVTInfo _, 5169 X86VectorVTInfo IntInfo> { 5170 // Masked register-register logical operations. 5171 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5172 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5173 _.RC:$src0)), 5174 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5175 _.RC:$src1, _.RC:$src2)>; 5176 5177 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5178 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5179 _.ImmAllZerosV)), 5180 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5181 _.RC:$src2)>; 5182 5183 // Masked register-memory logical operations. 5184 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5185 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5186 (load addr:$src2)))), 5187 _.RC:$src0)), 5188 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5189 _.RC:$src1, addr:$src2)>; 5190 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5191 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5192 (load addr:$src2)))), 5193 _.ImmAllZerosV)), 5194 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5195 addr:$src2)>; 5196} 5197 5198multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5199 X86VectorVTInfo _, 5200 X86VectorVTInfo IntInfo> { 5201 // Register-broadcast logical operations. 5202 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5203 (bitconvert 5204 (IntInfo.VT (OpNode _.RC:$src1, 5205 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5206 _.RC:$src0)), 5207 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5208 _.RC:$src1, addr:$src2)>; 5209 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5210 (bitconvert 5211 (IntInfo.VT (OpNode _.RC:$src1, 5212 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5213 _.ImmAllZerosV)), 5214 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5215 _.RC:$src1, addr:$src2)>; 5216} 5217 5218multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5219 AVX512VLVectorVTInfo SelectInfo, 5220 AVX512VLVectorVTInfo IntInfo> { 5221let Predicates = [HasVLX] in { 5222 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5223 IntInfo.info128>; 5224 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5225 IntInfo.info256>; 5226} 5227let Predicates = [HasAVX512] in { 5228 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5229 IntInfo.info512>; 5230} 5231} 5232 5233multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5234 AVX512VLVectorVTInfo SelectInfo, 5235 AVX512VLVectorVTInfo IntInfo> { 5236let Predicates = [HasVLX] in { 5237 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5238 SelectInfo.info128, IntInfo.info128>; 5239 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5240 SelectInfo.info256, IntInfo.info256>; 5241} 5242let Predicates = [HasAVX512] in { 5243 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5244 SelectInfo.info512, IntInfo.info512>; 5245} 5246} 5247 5248multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5249 // i64 vselect with i32/i16/i8 logic op 5250 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5251 avx512vl_i32_info>; 5252 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5253 avx512vl_i16_info>; 5254 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5255 avx512vl_i8_info>; 5256 5257 // i32 vselect with i64/i16/i8 logic op 5258 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5259 avx512vl_i64_info>; 5260 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5261 avx512vl_i16_info>; 5262 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5263 avx512vl_i8_info>; 5264 5265 // f32 vselect with i64/i32/i16/i8 logic op 5266 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5267 avx512vl_i64_info>; 5268 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5269 avx512vl_i32_info>; 5270 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5271 avx512vl_i16_info>; 5272 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5273 avx512vl_i8_info>; 5274 5275 // f64 vselect with i64/i32/i16/i8 logic op 5276 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5277 avx512vl_i64_info>; 5278 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5279 avx512vl_i32_info>; 5280 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5281 avx512vl_i16_info>; 5282 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5283 avx512vl_i8_info>; 5284 5285 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5286 avx512vl_f32_info, 5287 avx512vl_i32_info>; 5288 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5289 avx512vl_f64_info, 5290 avx512vl_i64_info>; 5291} 5292 5293defm : avx512_logical_lowering_types<"VPAND", and>; 5294defm : avx512_logical_lowering_types<"VPOR", or>; 5295defm : avx512_logical_lowering_types<"VPXOR", xor>; 5296defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5297 5298//===----------------------------------------------------------------------===// 5299// AVX-512 FP arithmetic 5300//===----------------------------------------------------------------------===// 5301 5302multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5303 SDNode OpNode, SDNode VecNode, 5304 X86FoldableSchedWrite sched, bit IsCommutable> { 5305 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5306 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5307 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5308 "$src2, $src1", "$src1, $src2", 5309 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5310 Sched<[sched]>; 5311 5312 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5313 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5314 "$src2, $src1", "$src1, $src2", 5315 (_.VT (VecNode _.RC:$src1, 5316 (_.ScalarIntMemFrags addr:$src2)))>, 5317 Sched<[sched.Folded, sched.ReadAfterFold]>; 5318 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5319 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5320 (ins _.FRC:$src1, _.FRC:$src2), 5321 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5322 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5323 Sched<[sched]> { 5324 let isCommutable = IsCommutable; 5325 } 5326 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5327 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5328 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5329 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5330 (_.ScalarLdFrag addr:$src2)))]>, 5331 Sched<[sched.Folded, sched.ReadAfterFold]>; 5332 } 5333 } 5334} 5335 5336multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5337 SDNode VecNode, X86FoldableSchedWrite sched, 5338 bit IsCommutable = 0> { 5339 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5340 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5341 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5342 "$rc, $src2, $src1", "$src1, $src2, $rc", 5343 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5344 (i32 timm:$rc))>, 5345 EVEX_B, EVEX_RC, Sched<[sched]>; 5346} 5347multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5348 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5349 X86FoldableSchedWrite sched, bit IsCommutable, 5350 string EVEX2VexOvrd> { 5351 let ExeDomain = _.ExeDomain in { 5352 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5353 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5354 "$src2, $src1", "$src1, $src2", 5355 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5356 Sched<[sched]>, SIMD_EXC; 5357 5358 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5359 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5360 "$src2, $src1", "$src1, $src2", 5361 (_.VT (VecNode _.RC:$src1, 5362 (_.ScalarIntMemFrags addr:$src2)))>, 5363 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5364 5365 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5366 Uses = [MXCSR], mayRaiseFPException = 1 in { 5367 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5368 (ins _.FRC:$src1, _.FRC:$src2), 5369 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5370 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5371 Sched<[sched]>, 5372 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5373 let isCommutable = IsCommutable; 5374 } 5375 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5376 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5377 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5378 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5379 (_.ScalarLdFrag addr:$src2)))]>, 5380 Sched<[sched.Folded, sched.ReadAfterFold]>, 5381 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5382 } 5383 5384 let Uses = [MXCSR] in 5385 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5386 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5387 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5388 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5389 EVEX_B, Sched<[sched]>; 5390 } 5391} 5392 5393multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 5394 SDNode VecNode, SDNode RndNode, 5395 X86SchedWriteSizes sched, bit IsCommutable> { 5396 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5397 sched.PS.Scl, IsCommutable>, 5398 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5399 sched.PS.Scl, IsCommutable>, 5400 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5401 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5402 sched.PD.Scl, IsCommutable>, 5403 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5404 sched.PD.Scl, IsCommutable>, 5405 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5406} 5407 5408multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5409 SDNode VecNode, SDNode SaeNode, 5410 X86SchedWriteSizes sched, bit IsCommutable> { 5411 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5412 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5413 NAME#"SS">, 5414 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5415 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5416 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5417 NAME#"SD">, 5418 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5419} 5420defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5421 SchedWriteFAddSizes, 1>; 5422defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5423 SchedWriteFMulSizes, 1>; 5424defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5425 SchedWriteFAddSizes, 0>; 5426defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5427 SchedWriteFDivSizes, 0>; 5428defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5429 SchedWriteFCmpSizes, 0>; 5430defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5431 SchedWriteFCmpSizes, 0>; 5432 5433// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5434// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5435multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5436 X86VectorVTInfo _, SDNode OpNode, 5437 X86FoldableSchedWrite sched, 5438 string EVEX2VEXOvrd> { 5439 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5440 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5441 (ins _.FRC:$src1, _.FRC:$src2), 5442 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5443 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5444 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5445 let isCommutable = 1; 5446 } 5447 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5448 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5449 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5450 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5451 (_.ScalarLdFrag addr:$src2)))]>, 5452 Sched<[sched.Folded, sched.ReadAfterFold]>, 5453 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5454 } 5455} 5456defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5457 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5458 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5459 5460defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5461 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5462 VEX_W, EVEX_4V, VEX_LIG, 5463 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5464 5465defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5466 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5467 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5468 5469defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5470 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5471 VEX_W, EVEX_4V, VEX_LIG, 5472 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5473 5474multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5475 SDPatternOperator MaskOpNode, 5476 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5477 bit IsCommutable, 5478 bit IsKCommutable = IsCommutable> { 5479 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5480 Uses = [MXCSR], mayRaiseFPException = 1 in { 5481 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5482 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5483 "$src2, $src1", "$src1, $src2", 5484 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5485 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 5486 IsKCommutable, IsKCommutable>, 5487 EVEX_4V, Sched<[sched]>; 5488 let mayLoad = 1 in { 5489 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5490 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5491 "$src2, $src1", "$src1, $src2", 5492 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5493 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5494 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5495 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5496 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5497 "${src2}"#_.BroadcastStr#", $src1", 5498 "$src1, ${src2}"#_.BroadcastStr, 5499 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5500 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5501 EVEX_4V, EVEX_B, 5502 Sched<[sched.Folded, sched.ReadAfterFold]>; 5503 } 5504 } 5505} 5506 5507multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5508 SDPatternOperator OpNodeRnd, 5509 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5510 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5511 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5512 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix, 5513 "$rc, $src2, $src1", "$src1, $src2, $rc", 5514 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>, 5515 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5516} 5517 5518multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5519 SDPatternOperator OpNodeSAE, 5520 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5521 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5522 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5523 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5524 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5525 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5526 EVEX_4V, EVEX_B, Sched<[sched]>; 5527} 5528 5529multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5530 SDPatternOperator MaskOpNode, 5531 Predicate prd, X86SchedWriteSizes sched, 5532 bit IsCommutable = 0, 5533 bit IsPD128Commutable = IsCommutable> { 5534 let Predicates = [prd] in { 5535 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5536 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5537 EVEX_CD8<32, CD8VF>; 5538 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5539 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5540 EVEX_CD8<64, CD8VF>; 5541 } 5542 5543 // Define only if AVX512VL feature is present. 5544 let Predicates = [prd, HasVLX] in { 5545 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5546 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5547 EVEX_CD8<32, CD8VF>; 5548 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5549 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5550 EVEX_CD8<32, CD8VF>; 5551 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5552 sched.PD.XMM, IsPD128Commutable, 5553 IsCommutable>, EVEX_V128, PD, VEX_W, 5554 EVEX_CD8<64, CD8VF>; 5555 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5556 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5557 EVEX_CD8<64, CD8VF>; 5558 } 5559} 5560 5561let Uses = [MXCSR] in 5562multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5563 X86SchedWriteSizes sched> { 5564 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5565 v16f32_info>, 5566 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5567 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5568 v8f64_info>, 5569 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5570} 5571 5572let Uses = [MXCSR] in 5573multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5574 X86SchedWriteSizes sched> { 5575 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5576 v16f32_info>, 5577 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5578 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5579 v8f64_info>, 5580 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5581} 5582 5583defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5584 SchedWriteFAddSizes, 1>, 5585 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5586defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5587 SchedWriteFMulSizes, 1>, 5588 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5589defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5590 SchedWriteFAddSizes>, 5591 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5592defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5593 SchedWriteFDivSizes>, 5594 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5595defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5596 SchedWriteFCmpSizes, 0>, 5597 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5598defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5599 SchedWriteFCmpSizes, 0>, 5600 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5601let isCodeGenOnly = 1 in { 5602 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5603 SchedWriteFCmpSizes, 1>; 5604 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5605 SchedWriteFCmpSizes, 1>; 5606} 5607let Uses = []<Register>, mayRaiseFPException = 0 in { 5608defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5609 SchedWriteFLogicSizes, 1>; 5610defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5611 SchedWriteFLogicSizes, 0>; 5612defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5613 SchedWriteFLogicSizes, 1>; 5614defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5615 SchedWriteFLogicSizes, 1>; 5616} 5617 5618multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5619 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5620 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5621 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5622 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5623 "$src2, $src1", "$src1, $src2", 5624 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5625 EVEX_4V, Sched<[sched]>; 5626 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5627 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5628 "$src2, $src1", "$src1, $src2", 5629 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5630 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5631 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5632 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5633 "${src2}"#_.BroadcastStr#", $src1", 5634 "$src1, ${src2}"#_.BroadcastStr, 5635 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5636 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5637 } 5638} 5639 5640multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5641 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5642 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5643 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5644 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5645 "$src2, $src1", "$src1, $src2", 5646 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5647 Sched<[sched]>; 5648 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5649 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5650 "$src2, $src1", "$src1, $src2", 5651 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5652 Sched<[sched.Folded, sched.ReadAfterFold]>; 5653 } 5654} 5655 5656multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5657 X86SchedWriteWidths sched> { 5658 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5659 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5660 EVEX_V512, EVEX_CD8<32, CD8VF>; 5661 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5662 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5663 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 5664 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5665 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5666 X86scalefsRnd, sched.Scl>, 5667 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5668 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5669 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5670 X86scalefsRnd, sched.Scl>, 5671 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W; 5672 5673 // Define only if AVX512VL feature is present. 5674 let Predicates = [HasVLX] in { 5675 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5676 EVEX_V128, EVEX_CD8<32, CD8VF>; 5677 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5678 EVEX_V256, EVEX_CD8<32, CD8VF>; 5679 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5680 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 5681 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5682 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 5683 } 5684} 5685defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 5686 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; 5687 5688//===----------------------------------------------------------------------===// 5689// AVX-512 VPTESTM instructions 5690//===----------------------------------------------------------------------===// 5691 5692multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5693 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5694 string Name> { 5695 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5696 // There are just too many permutations due to commutability and bitcasts. 5697 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5698 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5699 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5700 "$src2, $src1", "$src1, $src2", 5701 (null_frag), (null_frag), 1>, 5702 EVEX_4V, Sched<[sched]>; 5703 let mayLoad = 1 in 5704 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5705 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5706 "$src2, $src1", "$src1, $src2", 5707 (null_frag), (null_frag)>, 5708 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5709 Sched<[sched.Folded, sched.ReadAfterFold]>; 5710 } 5711} 5712 5713multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5714 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5715 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5716 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5717 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5718 "${src2}"#_.BroadcastStr#", $src1", 5719 "$src1, ${src2}"#_.BroadcastStr, 5720 (null_frag), (null_frag)>, 5721 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5722 Sched<[sched.Folded, sched.ReadAfterFold]>; 5723} 5724 5725multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5726 X86SchedWriteWidths sched, 5727 AVX512VLVectorVTInfo _> { 5728 let Predicates = [HasAVX512] in 5729 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>, 5730 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5731 5732 let Predicates = [HasAVX512, HasVLX] in { 5733 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>, 5734 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5735 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>, 5736 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5737 } 5738} 5739 5740multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5741 X86SchedWriteWidths sched> { 5742 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5743 avx512vl_i32_info>; 5744 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5745 avx512vl_i64_info>, VEX_W; 5746} 5747 5748multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5749 X86SchedWriteWidths sched> { 5750 let Predicates = [HasBWI] in { 5751 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5752 v32i16_info, NAME#"W">, EVEX_V512, VEX_W; 5753 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5754 v64i8_info, NAME#"B">, EVEX_V512; 5755 } 5756 let Predicates = [HasVLX, HasBWI] in { 5757 5758 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5759 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W; 5760 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5761 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W; 5762 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5763 v32i8x_info, NAME#"B">, EVEX_V256; 5764 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5765 v16i8x_info, NAME#"B">, EVEX_V128; 5766 } 5767} 5768 5769multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5770 X86SchedWriteWidths sched> : 5771 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5772 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5773 5774defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5775 SchedWriteVecLogic>, T8PD; 5776defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5777 SchedWriteVecLogic>, T8XS; 5778 5779//===----------------------------------------------------------------------===// 5780// AVX-512 Shift instructions 5781//===----------------------------------------------------------------------===// 5782 5783multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5784 string OpcodeStr, SDNode OpNode, 5785 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5786 let ExeDomain = _.ExeDomain in { 5787 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5788 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5789 "$src2, $src1", "$src1, $src2", 5790 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5791 Sched<[sched]>; 5792 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5793 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5794 "$src2, $src1", "$src1, $src2", 5795 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5796 (i8 timm:$src2)))>, 5797 Sched<[sched.Folded]>; 5798 } 5799} 5800 5801multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5802 string OpcodeStr, SDNode OpNode, 5803 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5804 let ExeDomain = _.ExeDomain in 5805 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5806 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5807 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 5808 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5809 EVEX_B, Sched<[sched.Folded]>; 5810} 5811 5812multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5813 X86FoldableSchedWrite sched, ValueType SrcVT, 5814 X86VectorVTInfo _> { 5815 // src2 is always 128-bit 5816 let ExeDomain = _.ExeDomain in { 5817 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5818 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5819 "$src2, $src1", "$src1, $src2", 5820 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5821 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5822 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5823 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5824 "$src2, $src1", "$src1, $src2", 5825 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5826 AVX512BIBase, 5827 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5828 } 5829} 5830 5831multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5832 X86SchedWriteWidths sched, ValueType SrcVT, 5833 AVX512VLVectorVTInfo VTInfo, 5834 Predicate prd> { 5835 let Predicates = [prd] in 5836 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5837 VTInfo.info512>, EVEX_V512, 5838 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5839 let Predicates = [prd, HasVLX] in { 5840 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5841 VTInfo.info256>, EVEX_V256, 5842 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5843 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5844 VTInfo.info128>, EVEX_V128, 5845 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5846 } 5847} 5848 5849multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5850 string OpcodeStr, SDNode OpNode, 5851 X86SchedWriteWidths sched, 5852 bit NotEVEX2VEXConvertibleQ = 0> { 5853 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5854 avx512vl_i32_info, HasAVX512>; 5855 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5856 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5857 avx512vl_i64_info, HasAVX512>, VEX_W; 5858 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5859 avx512vl_i16_info, HasBWI>; 5860} 5861 5862multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5863 string OpcodeStr, SDNode OpNode, 5864 X86SchedWriteWidths sched, 5865 AVX512VLVectorVTInfo VTInfo> { 5866 let Predicates = [HasAVX512] in 5867 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5868 sched.ZMM, VTInfo.info512>, 5869 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5870 VTInfo.info512>, EVEX_V512; 5871 let Predicates = [HasAVX512, HasVLX] in { 5872 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5873 sched.YMM, VTInfo.info256>, 5874 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5875 VTInfo.info256>, EVEX_V256; 5876 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5877 sched.XMM, VTInfo.info128>, 5878 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5879 VTInfo.info128>, EVEX_V128; 5880 } 5881} 5882 5883multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5884 string OpcodeStr, SDNode OpNode, 5885 X86SchedWriteWidths sched> { 5886 let Predicates = [HasBWI] in 5887 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5888 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 5889 let Predicates = [HasVLX, HasBWI] in { 5890 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5891 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 5892 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5893 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 5894 } 5895} 5896 5897multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5898 Format ImmFormR, Format ImmFormM, 5899 string OpcodeStr, SDNode OpNode, 5900 X86SchedWriteWidths sched, 5901 bit NotEVEX2VEXConvertibleQ = 0> { 5902 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 5903 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 5904 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5905 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 5906 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 5907} 5908 5909defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 5910 SchedWriteVecShiftImm>, 5911 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 5912 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5913 5914defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 5915 SchedWriteVecShiftImm>, 5916 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 5917 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5918 5919defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 5920 SchedWriteVecShiftImm, 1>, 5921 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 5922 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5923 5924defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 5925 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5926defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 5927 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5928 5929defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 5930 SchedWriteVecShift>; 5931defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 5932 SchedWriteVecShift, 1>; 5933defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 5934 SchedWriteVecShift>; 5935 5936// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 5937let Predicates = [HasAVX512, NoVLX] in { 5938 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 5939 (EXTRACT_SUBREG (v8i64 5940 (VPSRAQZrr 5941 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5942 VR128X:$src2)), sub_ymm)>; 5943 5944 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5945 (EXTRACT_SUBREG (v8i64 5946 (VPSRAQZrr 5947 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5948 VR128X:$src2)), sub_xmm)>; 5949 5950 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 5951 (EXTRACT_SUBREG (v8i64 5952 (VPSRAQZri 5953 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5954 timm:$src2)), sub_ymm)>; 5955 5956 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 5957 (EXTRACT_SUBREG (v8i64 5958 (VPSRAQZri 5959 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5960 timm:$src2)), sub_xmm)>; 5961} 5962 5963//===-------------------------------------------------------------------===// 5964// Variable Bit Shifts 5965//===-------------------------------------------------------------------===// 5966 5967multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 5968 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5969 let ExeDomain = _.ExeDomain in { 5970 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5971 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5972 "$src2, $src1", "$src1, $src2", 5973 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 5974 AVX5128IBase, EVEX_4V, Sched<[sched]>; 5975 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5976 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5977 "$src2, $src1", "$src1, $src2", 5978 (_.VT (OpNode _.RC:$src1, 5979 (_.VT (_.LdFrag addr:$src2))))>, 5980 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5981 Sched<[sched.Folded, sched.ReadAfterFold]>; 5982 } 5983} 5984 5985multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5986 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5987 let ExeDomain = _.ExeDomain in 5988 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5989 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5990 "${src2}"#_.BroadcastStr#", $src1", 5991 "$src1, ${src2}"#_.BroadcastStr, 5992 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 5993 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5994 Sched<[sched.Folded, sched.ReadAfterFold]>; 5995} 5996 5997multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5998 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 5999 let Predicates = [HasAVX512] in 6000 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6001 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6002 6003 let Predicates = [HasAVX512, HasVLX] in { 6004 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6005 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6006 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6007 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6008 } 6009} 6010 6011multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6012 SDNode OpNode, X86SchedWriteWidths sched> { 6013 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6014 avx512vl_i32_info>; 6015 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6016 avx512vl_i64_info>, VEX_W; 6017} 6018 6019// Use 512bit version to implement 128/256 bit in case NoVLX. 6020multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6021 SDNode OpNode, list<Predicate> p> { 6022 let Predicates = p in { 6023 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6024 (_.info256.VT _.info256.RC:$src2))), 6025 (EXTRACT_SUBREG 6026 (!cast<Instruction>(OpcodeStr#"Zrr") 6027 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6028 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6029 sub_ymm)>; 6030 6031 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6032 (_.info128.VT _.info128.RC:$src2))), 6033 (EXTRACT_SUBREG 6034 (!cast<Instruction>(OpcodeStr#"Zrr") 6035 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6036 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6037 sub_xmm)>; 6038 } 6039} 6040multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6041 SDNode OpNode, X86SchedWriteWidths sched> { 6042 let Predicates = [HasBWI] in 6043 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6044 EVEX_V512, VEX_W; 6045 let Predicates = [HasVLX, HasBWI] in { 6046 6047 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6048 EVEX_V256, VEX_W; 6049 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6050 EVEX_V128, VEX_W; 6051 } 6052} 6053 6054defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6055 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6056 6057defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6058 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6059 6060defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6061 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6062 6063defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6064defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6065 6066defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6067defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6068defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6069defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6070 6071 6072// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6073let Predicates = [HasAVX512, NoVLX] in { 6074 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6075 (EXTRACT_SUBREG (v8i64 6076 (VPROLVQZrr 6077 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6078 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6079 sub_xmm)>; 6080 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6081 (EXTRACT_SUBREG (v8i64 6082 (VPROLVQZrr 6083 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6084 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6085 sub_ymm)>; 6086 6087 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6088 (EXTRACT_SUBREG (v16i32 6089 (VPROLVDZrr 6090 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6091 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6092 sub_xmm)>; 6093 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6094 (EXTRACT_SUBREG (v16i32 6095 (VPROLVDZrr 6096 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6097 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6098 sub_ymm)>; 6099 6100 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6101 (EXTRACT_SUBREG (v8i64 6102 (VPROLQZri 6103 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6104 timm:$src2)), sub_xmm)>; 6105 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6106 (EXTRACT_SUBREG (v8i64 6107 (VPROLQZri 6108 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6109 timm:$src2)), sub_ymm)>; 6110 6111 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6112 (EXTRACT_SUBREG (v16i32 6113 (VPROLDZri 6114 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6115 timm:$src2)), sub_xmm)>; 6116 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6117 (EXTRACT_SUBREG (v16i32 6118 (VPROLDZri 6119 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6120 timm:$src2)), sub_ymm)>; 6121} 6122 6123// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6124let Predicates = [HasAVX512, NoVLX] in { 6125 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6126 (EXTRACT_SUBREG (v8i64 6127 (VPRORVQZrr 6128 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6129 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6130 sub_xmm)>; 6131 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6132 (EXTRACT_SUBREG (v8i64 6133 (VPRORVQZrr 6134 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6135 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6136 sub_ymm)>; 6137 6138 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6139 (EXTRACT_SUBREG (v16i32 6140 (VPRORVDZrr 6141 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6142 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6143 sub_xmm)>; 6144 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6145 (EXTRACT_SUBREG (v16i32 6146 (VPRORVDZrr 6147 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6148 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6149 sub_ymm)>; 6150 6151 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6152 (EXTRACT_SUBREG (v8i64 6153 (VPRORQZri 6154 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6155 timm:$src2)), sub_xmm)>; 6156 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6157 (EXTRACT_SUBREG (v8i64 6158 (VPRORQZri 6159 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6160 timm:$src2)), sub_ymm)>; 6161 6162 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6163 (EXTRACT_SUBREG (v16i32 6164 (VPRORDZri 6165 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6166 timm:$src2)), sub_xmm)>; 6167 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6168 (EXTRACT_SUBREG (v16i32 6169 (VPRORDZri 6170 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6171 timm:$src2)), sub_ymm)>; 6172} 6173 6174//===-------------------------------------------------------------------===// 6175// 1-src variable permutation VPERMW/D/Q 6176//===-------------------------------------------------------------------===// 6177 6178multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6179 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6180 let Predicates = [HasAVX512] in 6181 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6182 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6183 6184 let Predicates = [HasAVX512, HasVLX] in 6185 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6186 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6187} 6188 6189multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6190 string OpcodeStr, SDNode OpNode, 6191 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6192 let Predicates = [HasAVX512] in 6193 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6194 sched, VTInfo.info512>, 6195 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6196 sched, VTInfo.info512>, EVEX_V512; 6197 let Predicates = [HasAVX512, HasVLX] in 6198 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6199 sched, VTInfo.info256>, 6200 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6201 sched, VTInfo.info256>, EVEX_V256; 6202} 6203 6204multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6205 Predicate prd, SDNode OpNode, 6206 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6207 let Predicates = [prd] in 6208 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6209 EVEX_V512 ; 6210 let Predicates = [HasVLX, prd] in { 6211 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6212 EVEX_V256 ; 6213 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6214 EVEX_V128 ; 6215 } 6216} 6217 6218defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6219 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6220defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6221 WriteVarShuffle256, avx512vl_i8_info>; 6222 6223defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6224 WriteVarShuffle256, avx512vl_i32_info>; 6225defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6226 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6227defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6228 WriteFVarShuffle256, avx512vl_f32_info>; 6229defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6230 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6231 6232defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6233 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6234 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6235defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6236 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6237 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6238 6239//===----------------------------------------------------------------------===// 6240// AVX-512 - VPERMIL 6241//===----------------------------------------------------------------------===// 6242 6243multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6244 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6245 X86VectorVTInfo Ctrl> { 6246 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6247 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6248 "$src2, $src1", "$src1, $src2", 6249 (_.VT (OpNode _.RC:$src1, 6250 (Ctrl.VT Ctrl.RC:$src2)))>, 6251 T8PD, EVEX_4V, Sched<[sched]>; 6252 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6253 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6254 "$src2, $src1", "$src1, $src2", 6255 (_.VT (OpNode 6256 _.RC:$src1, 6257 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6258 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6259 Sched<[sched.Folded, sched.ReadAfterFold]>; 6260 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6261 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6262 "${src2}"#_.BroadcastStr#", $src1", 6263 "$src1, ${src2}"#_.BroadcastStr, 6264 (_.VT (OpNode 6265 _.RC:$src1, 6266 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6267 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6268 Sched<[sched.Folded, sched.ReadAfterFold]>; 6269} 6270 6271multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6272 X86SchedWriteWidths sched, 6273 AVX512VLVectorVTInfo _, 6274 AVX512VLVectorVTInfo Ctrl> { 6275 let Predicates = [HasAVX512] in { 6276 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6277 _.info512, Ctrl.info512>, EVEX_V512; 6278 } 6279 let Predicates = [HasAVX512, HasVLX] in { 6280 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6281 _.info128, Ctrl.info128>, EVEX_V128; 6282 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6283 _.info256, Ctrl.info256>, EVEX_V256; 6284 } 6285} 6286 6287multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6288 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6289 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6290 _, Ctrl>; 6291 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6292 X86VPermilpi, SchedWriteFShuffle, _>, 6293 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6294} 6295 6296let ExeDomain = SSEPackedSingle in 6297defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6298 avx512vl_i32_info>; 6299let ExeDomain = SSEPackedDouble in 6300defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6301 avx512vl_i64_info>, VEX_W1X; 6302 6303//===----------------------------------------------------------------------===// 6304// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6305//===----------------------------------------------------------------------===// 6306 6307defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6308 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6309 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6310defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6311 X86PShufhw, SchedWriteShuffle>, 6312 EVEX, AVX512XSIi8Base; 6313defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6314 X86PShuflw, SchedWriteShuffle>, 6315 EVEX, AVX512XDIi8Base; 6316 6317//===----------------------------------------------------------------------===// 6318// AVX-512 - VPSHUFB 6319//===----------------------------------------------------------------------===// 6320 6321multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6322 X86SchedWriteWidths sched> { 6323 let Predicates = [HasBWI] in 6324 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6325 EVEX_V512; 6326 6327 let Predicates = [HasVLX, HasBWI] in { 6328 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6329 EVEX_V256; 6330 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6331 EVEX_V128; 6332 } 6333} 6334 6335defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6336 SchedWriteVarShuffle>, VEX_WIG; 6337 6338//===----------------------------------------------------------------------===// 6339// Move Low to High and High to Low packed FP Instructions 6340//===----------------------------------------------------------------------===// 6341 6342def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6343 (ins VR128X:$src1, VR128X:$src2), 6344 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6345 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6346 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6347let isCommutable = 1 in 6348def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6349 (ins VR128X:$src1, VR128X:$src2), 6350 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6351 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6352 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6353 6354//===----------------------------------------------------------------------===// 6355// VMOVHPS/PD VMOVLPS Instructions 6356// All patterns was taken from SSS implementation. 6357//===----------------------------------------------------------------------===// 6358 6359multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6360 SDPatternOperator OpNode, 6361 X86VectorVTInfo _> { 6362 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6363 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6364 (ins _.RC:$src1, f64mem:$src2), 6365 !strconcat(OpcodeStr, 6366 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6367 [(set _.RC:$dst, 6368 (OpNode _.RC:$src1, 6369 (_.VT (bitconvert 6370 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6371 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6372} 6373 6374// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6375// SSE1. And MOVLPS pattern is even more complex. 6376defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6377 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6378defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6379 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6380defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6381 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6382defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6383 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6384 6385let Predicates = [HasAVX512] in { 6386 // VMOVHPD patterns 6387 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6388 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6389 6390 // VMOVLPD patterns 6391 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6392 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6393} 6394 6395let SchedRW = [WriteFStore] in { 6396let mayStore = 1, hasSideEffects = 0 in 6397def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6398 (ins f64mem:$dst, VR128X:$src), 6399 "vmovhps\t{$src, $dst|$dst, $src}", 6400 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6401def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6402 (ins f64mem:$dst, VR128X:$src), 6403 "vmovhpd\t{$src, $dst|$dst, $src}", 6404 [(store (f64 (extractelt 6405 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6406 (iPTR 0))), addr:$dst)]>, 6407 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6408let mayStore = 1, hasSideEffects = 0 in 6409def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6410 (ins f64mem:$dst, VR128X:$src), 6411 "vmovlps\t{$src, $dst|$dst, $src}", 6412 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6413def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6414 (ins f64mem:$dst, VR128X:$src), 6415 "vmovlpd\t{$src, $dst|$dst, $src}", 6416 [(store (f64 (extractelt (v2f64 VR128X:$src), 6417 (iPTR 0))), addr:$dst)]>, 6418 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6419} // SchedRW 6420 6421let Predicates = [HasAVX512] in { 6422 // VMOVHPD patterns 6423 def : Pat<(store (f64 (extractelt 6424 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6425 (iPTR 0))), addr:$dst), 6426 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6427} 6428//===----------------------------------------------------------------------===// 6429// FMA - Fused Multiply Operations 6430// 6431 6432multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6433 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6434 X86VectorVTInfo _, string Suff> { 6435 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6436 Uses = [MXCSR], mayRaiseFPException = 1 in { 6437 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6438 (ins _.RC:$src2, _.RC:$src3), 6439 OpcodeStr, "$src3, $src2", "$src2, $src3", 6440 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6441 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6442 AVX512FMA3Base, Sched<[sched]>; 6443 6444 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6445 (ins _.RC:$src2, _.MemOp:$src3), 6446 OpcodeStr, "$src3, $src2", "$src2, $src3", 6447 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6448 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6449 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6450 6451 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6452 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6453 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6454 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6455 (OpNode _.RC:$src2, 6456 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6457 (MaskOpNode _.RC:$src2, 6458 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6459 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6460 } 6461} 6462 6463multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6464 X86FoldableSchedWrite sched, 6465 X86VectorVTInfo _, string Suff> { 6466 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6467 Uses = [MXCSR] in 6468 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6469 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6470 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6471 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6472 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6473 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6474} 6475 6476multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6477 SDNode MaskOpNode, SDNode OpNodeRnd, 6478 X86SchedWriteWidths sched, 6479 AVX512VLVectorVTInfo _, string Suff> { 6480 let Predicates = [HasAVX512] in { 6481 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6482 sched.ZMM, _.info512, Suff>, 6483 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6484 _.info512, Suff>, 6485 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6486 } 6487 let Predicates = [HasVLX, HasAVX512] in { 6488 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6489 sched.YMM, _.info256, Suff>, 6490 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6491 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6492 sched.XMM, _.info128, Suff>, 6493 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6494 } 6495} 6496 6497multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6498 SDNode MaskOpNode, SDNode OpNodeRnd> { 6499 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6500 OpNodeRnd, SchedWriteFMA, 6501 avx512vl_f32_info, "PS">; 6502 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6503 OpNodeRnd, SchedWriteFMA, 6504 avx512vl_f64_info, "PD">, VEX_W; 6505} 6506 6507defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6508 fma, X86FmaddRnd>; 6509defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6510 X86Fmsub, X86FmsubRnd>; 6511defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6512 X86Fmaddsub, X86FmaddsubRnd>; 6513defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6514 X86Fmsubadd, X86FmsubaddRnd>; 6515defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6516 X86Fnmadd, X86FnmaddRnd>; 6517defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6518 X86Fnmsub, X86FnmsubRnd>; 6519 6520 6521multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6522 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6523 X86VectorVTInfo _, string Suff> { 6524 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6525 Uses = [MXCSR], mayRaiseFPException = 1 in { 6526 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6527 (ins _.RC:$src2, _.RC:$src3), 6528 OpcodeStr, "$src3, $src2", "$src2, $src3", 6529 (null_frag), 6530 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6531 AVX512FMA3Base, Sched<[sched]>; 6532 6533 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6534 (ins _.RC:$src2, _.MemOp:$src3), 6535 OpcodeStr, "$src3, $src2", "$src2, $src3", 6536 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6537 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6538 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6539 6540 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6541 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6542 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6543 "$src2, ${src3}"#_.BroadcastStr, 6544 (_.VT (OpNode _.RC:$src2, 6545 (_.VT (_.BroadcastLdFrag addr:$src3)), 6546 _.RC:$src1)), 6547 (_.VT (MaskOpNode _.RC:$src2, 6548 (_.VT (_.BroadcastLdFrag addr:$src3)), 6549 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, 6550 Sched<[sched.Folded, sched.ReadAfterFold]>; 6551 } 6552} 6553 6554multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6555 X86FoldableSchedWrite sched, 6556 X86VectorVTInfo _, string Suff> { 6557 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6558 Uses = [MXCSR] in 6559 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6560 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6561 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6562 (null_frag), 6563 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6564 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6565} 6566 6567multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6568 SDNode MaskOpNode, SDNode OpNodeRnd, 6569 X86SchedWriteWidths sched, 6570 AVX512VLVectorVTInfo _, string Suff> { 6571 let Predicates = [HasAVX512] in { 6572 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6573 sched.ZMM, _.info512, Suff>, 6574 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6575 _.info512, Suff>, 6576 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6577 } 6578 let Predicates = [HasVLX, HasAVX512] in { 6579 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6580 sched.YMM, _.info256, Suff>, 6581 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6582 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6583 sched.XMM, _.info128, Suff>, 6584 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6585 } 6586} 6587 6588multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6589 SDNode MaskOpNode, SDNode OpNodeRnd > { 6590 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6591 OpNodeRnd, SchedWriteFMA, 6592 avx512vl_f32_info, "PS">; 6593 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6594 OpNodeRnd, SchedWriteFMA, 6595 avx512vl_f64_info, "PD">, VEX_W; 6596} 6597 6598defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6599 fma, X86FmaddRnd>; 6600defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6601 X86Fmsub, X86FmsubRnd>; 6602defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6603 X86Fmaddsub, X86FmaddsubRnd>; 6604defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6605 X86Fmsubadd, X86FmsubaddRnd>; 6606defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6607 X86Fnmadd, X86FnmaddRnd>; 6608defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6609 X86Fnmsub, X86FnmsubRnd>; 6610 6611multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6612 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6613 X86VectorVTInfo _, string Suff> { 6614 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6615 Uses = [MXCSR], mayRaiseFPException = 1 in { 6616 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6617 (ins _.RC:$src2, _.RC:$src3), 6618 OpcodeStr, "$src3, $src2", "$src2, $src3", 6619 (null_frag), 6620 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6621 AVX512FMA3Base, Sched<[sched]>; 6622 6623 // Pattern is 312 order so that the load is in a different place from the 6624 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6625 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6626 (ins _.RC:$src2, _.MemOp:$src3), 6627 OpcodeStr, "$src3, $src2", "$src2, $src3", 6628 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6629 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6630 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 6631 6632 // Pattern is 312 order so that the load is in a different place from the 6633 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6634 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6635 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6636 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6637 "$src2, ${src3}"#_.BroadcastStr, 6638 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6639 _.RC:$src1, _.RC:$src2)), 6640 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6641 _.RC:$src1, _.RC:$src2)), 1, 0>, 6642 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6643 } 6644} 6645 6646multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6647 X86FoldableSchedWrite sched, 6648 X86VectorVTInfo _, string Suff> { 6649 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6650 Uses = [MXCSR] in 6651 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6652 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6653 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6654 (null_frag), 6655 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6656 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6657} 6658 6659multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6660 SDNode MaskOpNode, SDNode OpNodeRnd, 6661 X86SchedWriteWidths sched, 6662 AVX512VLVectorVTInfo _, string Suff> { 6663 let Predicates = [HasAVX512] in { 6664 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6665 sched.ZMM, _.info512, Suff>, 6666 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6667 _.info512, Suff>, 6668 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6669 } 6670 let Predicates = [HasVLX, HasAVX512] in { 6671 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6672 sched.YMM, _.info256, Suff>, 6673 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6674 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6675 sched.XMM, _.info128, Suff>, 6676 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6677 } 6678} 6679 6680multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6681 SDNode MaskOpNode, SDNode OpNodeRnd > { 6682 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6683 OpNodeRnd, SchedWriteFMA, 6684 avx512vl_f32_info, "PS">; 6685 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6686 OpNodeRnd, SchedWriteFMA, 6687 avx512vl_f64_info, "PD">, VEX_W; 6688} 6689 6690defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 6691 fma, X86FmaddRnd>; 6692defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 6693 X86Fmsub, X86FmsubRnd>; 6694defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 6695 X86Fmaddsub, X86FmaddsubRnd>; 6696defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 6697 X86Fmsubadd, X86FmsubaddRnd>; 6698defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 6699 X86Fnmadd, X86FnmaddRnd>; 6700defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 6701 X86Fnmsub, X86FnmsubRnd>; 6702 6703// Scalar FMA 6704multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6705 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6706let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6707 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6708 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6709 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6710 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6711 6712 let mayLoad = 1 in 6713 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6714 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6715 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6716 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6717 6718 let Uses = [MXCSR] in 6719 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6720 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6721 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6722 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6723 6724 let isCodeGenOnly = 1, isCommutable = 1 in { 6725 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6726 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6727 !strconcat(OpcodeStr, 6728 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6729 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6730 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst), 6731 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6732 !strconcat(OpcodeStr, 6733 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6734 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6735 6736 let Uses = [MXCSR] in 6737 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6738 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6739 !strconcat(OpcodeStr, 6740 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6741 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6742 Sched<[SchedWriteFMA.Scl]>; 6743 }// isCodeGenOnly = 1 6744}// Constraints = "$src1 = $dst" 6745} 6746 6747multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6748 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 6749 X86VectorVTInfo _, string SUFF> { 6750 let ExeDomain = _.ExeDomain in { 6751 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6752 // Operands for intrinsic are in 123 order to preserve passthu 6753 // semantics. 6754 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6755 _.FRC:$src3))), 6756 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6757 (_.ScalarLdFrag addr:$src3)))), 6758 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6759 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6760 6761 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6762 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6763 _.FRC:$src1))), 6764 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6765 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6766 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6767 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6768 6769 // One pattern is 312 order so that the load is in a different place from the 6770 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6771 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6772 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6773 _.FRC:$src2))), 6774 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6775 _.FRC:$src1, _.FRC:$src2))), 6776 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6777 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6778 } 6779} 6780 6781multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6782 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { 6783 let Predicates = [HasAVX512] in { 6784 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6785 OpNodeRnd, f32x_info, "SS">, 6786 EVEX_CD8<32, CD8VT1>, VEX_LIG; 6787 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6788 OpNodeRnd, f64x_info, "SD">, 6789 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 6790 } 6791} 6792 6793defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 6794defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 6795defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 6796defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 6797 6798multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp, 6799 SDNode RndOp, string Prefix, 6800 string Suffix, SDNode Move, 6801 X86VectorVTInfo _, PatLeaf ZeroFP> { 6802 let Predicates = [HasAVX512] in { 6803 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6804 (Op _.FRC:$src2, 6805 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6806 _.FRC:$src3))))), 6807 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6808 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6809 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6810 6811 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6812 (Op _.FRC:$src2, _.FRC:$src3, 6813 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6814 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6815 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6816 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6817 6818 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6819 (Op _.FRC:$src2, 6820 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6821 (_.ScalarLdFrag addr:$src3)))))), 6822 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6823 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6824 addr:$src3)>; 6825 6826 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6827 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6828 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6829 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6830 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6831 addr:$src3)>; 6832 6833 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6834 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6835 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6836 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6837 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6838 addr:$src3)>; 6839 6840 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6841 (X86selects_mask VK1WM:$mask, 6842 (MaskedOp _.FRC:$src2, 6843 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6844 _.FRC:$src3), 6845 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6846 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6847 VR128X:$src1, VK1WM:$mask, 6848 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6849 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6850 6851 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6852 (X86selects_mask VK1WM:$mask, 6853 (MaskedOp _.FRC:$src2, 6854 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6855 (_.ScalarLdFrag addr:$src3)), 6856 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6857 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6858 VR128X:$src1, VK1WM:$mask, 6859 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6860 6861 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6862 (X86selects_mask VK1WM:$mask, 6863 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6864 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6865 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6866 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6867 VR128X:$src1, VK1WM:$mask, 6868 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6869 6870 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6871 (X86selects_mask VK1WM:$mask, 6872 (MaskedOp _.FRC:$src2, _.FRC:$src3, 6873 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6874 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6875 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 6876 VR128X:$src1, VK1WM:$mask, 6877 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6878 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6879 6880 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6881 (X86selects_mask VK1WM:$mask, 6882 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6883 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6884 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6885 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 6886 VR128X:$src1, VK1WM:$mask, 6887 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6888 6889 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6890 (X86selects_mask VK1WM:$mask, 6891 (MaskedOp _.FRC:$src2, 6892 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6893 _.FRC:$src3), 6894 (_.EltVT ZeroFP)))))), 6895 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 6896 VR128X:$src1, VK1WM:$mask, 6897 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6898 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6899 6900 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6901 (X86selects_mask VK1WM:$mask, 6902 (MaskedOp _.FRC:$src2, _.FRC:$src3, 6903 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6904 (_.EltVT ZeroFP)))))), 6905 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 6906 VR128X:$src1, VK1WM:$mask, 6907 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6908 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6909 6910 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6911 (X86selects_mask VK1WM:$mask, 6912 (MaskedOp _.FRC:$src2, 6913 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6914 (_.ScalarLdFrag addr:$src3)), 6915 (_.EltVT ZeroFP)))))), 6916 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 6917 VR128X:$src1, VK1WM:$mask, 6918 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6919 6920 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6921 (X86selects_mask VK1WM:$mask, 6922 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6923 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 6924 (_.EltVT ZeroFP)))))), 6925 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 6926 VR128X:$src1, VK1WM:$mask, 6927 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6928 6929 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6930 (X86selects_mask VK1WM:$mask, 6931 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6932 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6933 (_.EltVT ZeroFP)))))), 6934 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 6935 VR128X:$src1, VK1WM:$mask, 6936 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6937 6938 // Patterns with rounding mode. 6939 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6940 (RndOp _.FRC:$src2, 6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6942 _.FRC:$src3, (i32 timm:$rc)))))), 6943 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 6944 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6945 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6946 6947 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6948 (RndOp _.FRC:$src2, _.FRC:$src3, 6949 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6950 (i32 timm:$rc)))))), 6951 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 6952 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6953 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6954 6955 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6956 (X86selects_mask VK1WM:$mask, 6957 (RndOp _.FRC:$src2, 6958 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6959 _.FRC:$src3, (i32 timm:$rc)), 6960 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6961 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 6962 VR128X:$src1, VK1WM:$mask, 6963 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6964 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6965 6966 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6967 (X86selects_mask VK1WM:$mask, 6968 (RndOp _.FRC:$src2, _.FRC:$src3, 6969 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6970 (i32 timm:$rc)), 6971 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6972 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 6973 VR128X:$src1, VK1WM:$mask, 6974 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6975 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6976 6977 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6978 (X86selects_mask VK1WM:$mask, 6979 (RndOp _.FRC:$src2, 6980 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6981 _.FRC:$src3, (i32 timm:$rc)), 6982 (_.EltVT ZeroFP)))))), 6983 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 6984 VR128X:$src1, VK1WM:$mask, 6985 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6986 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6987 6988 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6989 (X86selects_mask VK1WM:$mask, 6990 (RndOp _.FRC:$src2, _.FRC:$src3, 6991 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6992 (i32 timm:$rc)), 6993 (_.EltVT ZeroFP)))))), 6994 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 6995 VR128X:$src1, VK1WM:$mask, 6996 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6997 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 6998 } 6999} 7000 7001defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7002 "SS", X86Movss, v4f32x_info, fp32imm0>; 7003defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7004 "SS", X86Movss, v4f32x_info, fp32imm0>; 7005defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7006 "SS", X86Movss, v4f32x_info, fp32imm0>; 7007defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7008 "SS", X86Movss, v4f32x_info, fp32imm0>; 7009 7010defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7011 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7012defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7013 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7014defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7015 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7016defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7017 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7018 7019//===----------------------------------------------------------------------===// 7020// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7021//===----------------------------------------------------------------------===// 7022let Constraints = "$src1 = $dst" in { 7023multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7024 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7025 // NOTE: The SDNode have the multiply operands first with the add last. 7026 // This enables commuted load patterns to be autogenerated by tablegen. 7027 let ExeDomain = _.ExeDomain in { 7028 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7029 (ins _.RC:$src2, _.RC:$src3), 7030 OpcodeStr, "$src3, $src2", "$src2, $src3", 7031 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7032 AVX512FMA3Base, Sched<[sched]>; 7033 7034 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7035 (ins _.RC:$src2, _.MemOp:$src3), 7036 OpcodeStr, "$src3, $src2", "$src2, $src3", 7037 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7038 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; 7039 7040 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7041 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7042 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7043 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7044 (OpNode _.RC:$src2, 7045 (_.VT (_.BroadcastLdFrag addr:$src3)), 7046 _.RC:$src1)>, 7047 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 7048 } 7049} 7050} // Constraints = "$src1 = $dst" 7051 7052multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7053 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7054 let Predicates = [HasIFMA] in { 7055 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7056 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7057 } 7058 let Predicates = [HasVLX, HasIFMA] in { 7059 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7060 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7061 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7062 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7063 } 7064} 7065 7066defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7067 SchedWriteVecIMul, avx512vl_i64_info>, 7068 VEX_W; 7069defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7070 SchedWriteVecIMul, avx512vl_i64_info>, 7071 VEX_W; 7072 7073//===----------------------------------------------------------------------===// 7074// AVX-512 Scalar convert from sign integer to float/double 7075//===----------------------------------------------------------------------===// 7076 7077multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7078 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7079 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7080 string mem, list<Register> _Uses = [MXCSR], 7081 bit _mayRaiseFPException = 1> { 7082let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7083 mayRaiseFPException = _mayRaiseFPException in { 7084 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7085 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7086 (ins DstVT.FRC:$src1, SrcRC:$src), 7087 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7088 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7089 let mayLoad = 1 in 7090 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7091 (ins DstVT.FRC:$src1, x86memop:$src), 7092 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7093 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7094 } // hasSideEffects = 0 7095 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7096 (ins DstVT.RC:$src1, SrcRC:$src2), 7097 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7098 [(set DstVT.RC:$dst, 7099 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7100 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7101 7102 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7103 (ins DstVT.RC:$src1, x86memop:$src2), 7104 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7105 [(set DstVT.RC:$dst, 7106 (OpNode (DstVT.VT DstVT.RC:$src1), 7107 (ld_frag addr:$src2)))]>, 7108 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7109} 7110 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7111 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7112 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7113} 7114 7115multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7116 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7117 X86VectorVTInfo DstVT, string asm, 7118 string mem> { 7119 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7120 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7121 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7122 !strconcat(asm, 7123 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7124 [(set DstVT.RC:$dst, 7125 (OpNode (DstVT.VT DstVT.RC:$src1), 7126 SrcRC:$src2, 7127 (i32 timm:$rc)))]>, 7128 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7129 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7130 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7131 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7132} 7133 7134multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7135 X86FoldableSchedWrite sched, 7136 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7137 X86MemOperand x86memop, PatFrag ld_frag, 7138 string asm, string mem> { 7139 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7140 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7141 ld_frag, asm, mem>, VEX_LIG; 7142} 7143 7144let Predicates = [HasAVX512] in { 7145defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7146 WriteCvtI2SS, GR32, 7147 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7148 XS, EVEX_CD8<32, CD8VT1>; 7149defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7150 WriteCvtI2SS, GR64, 7151 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7152 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7153defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7154 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7155 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7156defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7157 WriteCvtI2SD, GR64, 7158 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7159 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7160 7161def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7162 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7163def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7164 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7165 7166def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7167 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7168def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7169 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7170def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7171 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7172def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7173 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7174 7175def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7176 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7177def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7178 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7179def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7180 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7181def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7182 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7183 7184defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7185 WriteCvtI2SS, GR32, 7186 v4f32x_info, i32mem, loadi32, 7187 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7188defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7189 WriteCvtI2SS, GR64, 7190 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7191 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7192defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7193 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7194 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7195defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7196 WriteCvtI2SD, GR64, 7197 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7198 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7199 7200def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7201 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7202def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7203 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7204 7205def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7206 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7207def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7208 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7209def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7210 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7211def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7212 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7213 7214def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7215 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7216def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7217 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7218def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7219 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7220def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7221 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7222} 7223 7224//===----------------------------------------------------------------------===// 7225// AVX-512 Scalar convert from float/double to integer 7226//===----------------------------------------------------------------------===// 7227 7228multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7229 X86VectorVTInfo DstVT, SDNode OpNode, 7230 SDNode OpNodeRnd, 7231 X86FoldableSchedWrite sched, string asm, 7232 string aliasStr> { 7233 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7234 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7235 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7236 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7237 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7238 let Uses = [MXCSR] in 7239 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7240 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7241 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7242 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7243 Sched<[sched]>; 7244 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7245 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7246 [(set DstVT.RC:$dst, (OpNode 7247 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7248 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7249 } // Predicates = [HasAVX512] 7250 7251 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7252 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7253 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7254 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7255 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7256 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7257 SrcVT.IntScalarMemOp:$src), 0, "att">; 7258} 7259 7260// Convert float/double to signed/unsigned int 32/64 7261defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7262 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7263 XS, EVEX_CD8<32, CD8VT1>; 7264defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7265 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7266 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7267defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7268 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7269 XS, EVEX_CD8<32, CD8VT1>; 7270defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7271 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7272 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7273defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7274 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7275 XD, EVEX_CD8<64, CD8VT1>; 7276defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7277 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7278 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7279defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7280 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7281 XD, EVEX_CD8<64, CD8VT1>; 7282defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7283 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7284 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7285 7286multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7287 X86VectorVTInfo DstVT, SDNode OpNode, 7288 X86FoldableSchedWrite sched, 7289 string aliasStr> { 7290 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7291 let isCodeGenOnly = 1 in { 7292 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7293 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7294 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7295 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7296 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7297 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7298 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7299 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7300 } 7301 } // Predicates = [HasAVX512] 7302} 7303 7304defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7305 lrint, WriteCvtSS2I, 7306 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7307defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7308 llrint, WriteCvtSS2I, 7309 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7310defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7311 lrint, WriteCvtSD2I, 7312 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7313defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7314 llrint, WriteCvtSD2I, 7315 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7316 7317let Predicates = [HasAVX512] in { 7318 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7319 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7320 7321 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7322 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7323} 7324 7325// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7326// which produce unnecessary vmovs{s,d} instructions 7327let Predicates = [HasAVX512] in { 7328def : Pat<(v4f32 (X86Movss 7329 (v4f32 VR128X:$dst), 7330 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7331 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7332 7333def : Pat<(v4f32 (X86Movss 7334 (v4f32 VR128X:$dst), 7335 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7336 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7337 7338def : Pat<(v4f32 (X86Movss 7339 (v4f32 VR128X:$dst), 7340 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7341 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7342 7343def : Pat<(v4f32 (X86Movss 7344 (v4f32 VR128X:$dst), 7345 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7346 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7347 7348def : Pat<(v2f64 (X86Movsd 7349 (v2f64 VR128X:$dst), 7350 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7351 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7352 7353def : Pat<(v2f64 (X86Movsd 7354 (v2f64 VR128X:$dst), 7355 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7356 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7357 7358def : Pat<(v2f64 (X86Movsd 7359 (v2f64 VR128X:$dst), 7360 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7361 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7362 7363def : Pat<(v2f64 (X86Movsd 7364 (v2f64 VR128X:$dst), 7365 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7366 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7367 7368def : Pat<(v4f32 (X86Movss 7369 (v4f32 VR128X:$dst), 7370 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7371 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7372 7373def : Pat<(v4f32 (X86Movss 7374 (v4f32 VR128X:$dst), 7375 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7376 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7377 7378def : Pat<(v4f32 (X86Movss 7379 (v4f32 VR128X:$dst), 7380 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7381 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7382 7383def : Pat<(v4f32 (X86Movss 7384 (v4f32 VR128X:$dst), 7385 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7386 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7387 7388def : Pat<(v2f64 (X86Movsd 7389 (v2f64 VR128X:$dst), 7390 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7391 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7392 7393def : Pat<(v2f64 (X86Movsd 7394 (v2f64 VR128X:$dst), 7395 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7396 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7397 7398def : Pat<(v2f64 (X86Movsd 7399 (v2f64 VR128X:$dst), 7400 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7401 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7402 7403def : Pat<(v2f64 (X86Movsd 7404 (v2f64 VR128X:$dst), 7405 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7406 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7407} // Predicates = [HasAVX512] 7408 7409// Convert float/double to signed/unsigned int 32/64 with truncation 7410multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7411 X86VectorVTInfo _DstRC, SDNode OpNode, 7412 SDNode OpNodeInt, SDNode OpNodeSAE, 7413 X86FoldableSchedWrite sched, string aliasStr>{ 7414let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in { 7415 let isCodeGenOnly = 1 in { 7416 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7417 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7418 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7419 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7420 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7421 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7422 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7423 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7424 } 7425 7426 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7427 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7428 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7429 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7430 let Uses = [MXCSR] in 7431 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7432 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7433 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7434 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7435 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7436 (ins _SrcRC.IntScalarMemOp:$src), 7437 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7438 [(set _DstRC.RC:$dst, 7439 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7440 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7441} //HasAVX512 7442 7443 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7444 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7445 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7446 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7447 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7448 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7449 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7450} 7451 7452defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7453 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7454 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7455defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7456 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7457 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7458defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7459 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7460 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7461defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7462 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7463 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7464 7465defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7466 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7467 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7468defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7469 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7470 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7471defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7472 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7473 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7474defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7475 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7476 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7477 7478//===----------------------------------------------------------------------===// 7479// AVX-512 Convert form float to double and back 7480//===----------------------------------------------------------------------===// 7481 7482let Uses = [MXCSR], mayRaiseFPException = 1 in 7483multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7484 X86VectorVTInfo _Src, SDNode OpNode, 7485 X86FoldableSchedWrite sched> { 7486 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7487 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7488 "$src2, $src1", "$src1, $src2", 7489 (_.VT (OpNode (_.VT _.RC:$src1), 7490 (_Src.VT _Src.RC:$src2)))>, 7491 EVEX_4V, VEX_LIG, Sched<[sched]>; 7492 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7493 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7494 "$src2, $src1", "$src1, $src2", 7495 (_.VT (OpNode (_.VT _.RC:$src1), 7496 (_Src.ScalarIntMemFrags addr:$src2)))>, 7497 EVEX_4V, VEX_LIG, 7498 Sched<[sched.Folded, sched.ReadAfterFold]>; 7499 7500 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7501 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7502 (ins _.FRC:$src1, _Src.FRC:$src2), 7503 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7504 EVEX_4V, VEX_LIG, Sched<[sched]>; 7505 let mayLoad = 1 in 7506 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7507 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7508 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7509 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7510 } 7511} 7512 7513// Scalar Conversion with SAE - suppress all exceptions 7514multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7515 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7516 X86FoldableSchedWrite sched> { 7517 let Uses = [MXCSR] in 7518 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7519 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7520 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7521 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7522 (_Src.VT _Src.RC:$src2)))>, 7523 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7524} 7525 7526// Scalar Conversion with rounding control (RC) 7527multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7528 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7529 X86FoldableSchedWrite sched> { 7530 let Uses = [MXCSR] in 7531 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7532 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7533 "$rc, $src2, $src1", "$src1, $src2, $rc", 7534 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7535 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7536 EVEX_4V, VEX_LIG, Sched<[sched]>, 7537 EVEX_B, EVEX_RC; 7538} 7539multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, 7540 SDNode OpNode, SDNode OpNodeRnd, 7541 X86FoldableSchedWrite sched, 7542 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7543 let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in { 7544 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7545 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7546 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; 7547 } 7548} 7549 7550multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, 7551 SDNode OpNode, SDNode OpNodeSAE, 7552 X86FoldableSchedWrite sched, 7553 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7554 let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in { 7555 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7556 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7557 EVEX_CD8<32, CD8VT1>, XS; 7558 } 7559} 7560defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds, 7561 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7562 f32x_info>; 7563defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts, 7564 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7565 f64x_info>; 7566 7567def : Pat<(f64 (any_fpextend FR32X:$src)), 7568 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7569 Requires<[HasAVX512]>; 7570def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7571 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7572 Requires<[HasAVX512, OptForSize]>; 7573 7574def : Pat<(f32 (any_fpround FR64X:$src)), 7575 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7576 Requires<[HasAVX512]>; 7577 7578def : Pat<(v4f32 (X86Movss 7579 (v4f32 VR128X:$dst), 7580 (v4f32 (scalar_to_vector 7581 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7582 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7583 Requires<[HasAVX512]>; 7584 7585def : Pat<(v2f64 (X86Movsd 7586 (v2f64 VR128X:$dst), 7587 (v2f64 (scalar_to_vector 7588 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7589 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7590 Requires<[HasAVX512]>; 7591 7592//===----------------------------------------------------------------------===// 7593// AVX-512 Vector convert from signed/unsigned integer to float/double 7594// and from float/double to signed/unsigned integer 7595//===----------------------------------------------------------------------===// 7596 7597multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7598 X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode, 7599 X86FoldableSchedWrite sched, 7600 string Broadcast = _.BroadcastStr, 7601 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7602 RegisterClass MaskRC = _.KRCWM, 7603 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7604 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7605let Uses = [MXCSR], mayRaiseFPException = 1 in { 7606 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7607 (ins _Src.RC:$src), 7608 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7609 (ins MaskRC:$mask, _Src.RC:$src), 7610 OpcodeStr, "$src", "$src", 7611 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7612 (vselect_mask MaskRC:$mask, 7613 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7614 _.RC:$src0), 7615 (vselect_mask MaskRC:$mask, 7616 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7617 _.ImmAllZerosV)>, 7618 EVEX, Sched<[sched]>; 7619 7620 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7621 (ins MemOp:$src), 7622 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7623 (ins MaskRC:$mask, MemOp:$src), 7624 OpcodeStr#Alias, "$src", "$src", 7625 LdDAG, 7626 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 7627 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 7628 EVEX, Sched<[sched.Folded]>; 7629 7630 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7631 (ins _Src.ScalarMemOp:$src), 7632 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7633 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7634 OpcodeStr, 7635 "${src}"#Broadcast, "${src}"#Broadcast, 7636 (_.VT (OpNode (_Src.VT 7637 (_Src.BroadcastLdFrag addr:$src)) 7638 )), 7639 (vselect_mask MaskRC:$mask, 7640 (_.VT 7641 (MaskOpNode 7642 (_Src.VT 7643 (_Src.BroadcastLdFrag addr:$src)))), 7644 _.RC:$src0), 7645 (vselect_mask MaskRC:$mask, 7646 (_.VT 7647 (MaskOpNode 7648 (_Src.VT 7649 (_Src.BroadcastLdFrag addr:$src)))), 7650 _.ImmAllZerosV)>, 7651 EVEX, EVEX_B, Sched<[sched.Folded]>; 7652 } 7653} 7654// Conversion with SAE - suppress all exceptions 7655multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7656 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7657 X86FoldableSchedWrite sched> { 7658 let Uses = [MXCSR] in 7659 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7660 (ins _Src.RC:$src), OpcodeStr, 7661 "{sae}, $src", "$src, {sae}", 7662 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7663 EVEX, EVEX_B, Sched<[sched]>; 7664} 7665 7666// Conversion with rounding control (RC) 7667multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7668 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7669 X86FoldableSchedWrite sched> { 7670 let Uses = [MXCSR] in 7671 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7672 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7673 "$rc, $src", "$src, $rc", 7674 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7675 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7676} 7677 7678// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7679multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7680 X86VectorVTInfo _Src, SDNode OpNode, 7681 SDNode MaskOpNode, 7682 X86FoldableSchedWrite sched, 7683 string Broadcast = _.BroadcastStr, 7684 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7685 RegisterClass MaskRC = _.KRCWM> 7686 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 7687 Alias, MemOp, MaskRC, 7688 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 7689 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7690 7691// Extend Float to Double 7692multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, 7693 X86SchedWriteWidths sched> { 7694 let Predicates = [HasAVX512] in { 7695 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, 7696 any_fpextend, fpextend, sched.ZMM>, 7697 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, 7698 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7699 } 7700 let Predicates = [HasVLX] in { 7701 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, 7702 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", 7703 "", f64mem>, EVEX_V128; 7704 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, 7705 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 7706 } 7707} 7708 7709// Truncate Double to Float 7710multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7711 let Predicates = [HasAVX512] in { 7712 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, 7713 X86any_vfpround, X86vfpround, sched.ZMM>, 7714 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, 7715 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7716 } 7717 let Predicates = [HasVLX] in { 7718 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, 7719 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 7720 f128mem, VK2WM>, EVEX_V128; 7721 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, 7722 X86any_vfpround, X86vfpround, 7723 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7724 } 7725 7726 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7727 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7728 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7729 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7730 VK2WM:$mask, VR128X:$src), 0, "att">; 7731 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 7732 "$dst {${mask}} {z}, $src}", 7733 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7734 VK2WM:$mask, VR128X:$src), 0, "att">; 7735 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7736 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7737 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7738 "$dst {${mask}}, ${src}{1to2}}", 7739 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7740 VK2WM:$mask, f64mem:$src), 0, "att">; 7741 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7742 "$dst {${mask}} {z}, ${src}{1to2}}", 7743 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7744 VK2WM:$mask, f64mem:$src), 0, "att">; 7745 7746 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7747 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7748 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7749 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7750 VK4WM:$mask, VR256X:$src), 0, "att">; 7751 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 7752 "$dst {${mask}} {z}, $src}", 7753 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7754 VK4WM:$mask, VR256X:$src), 0, "att">; 7755 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7756 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7757 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7758 "$dst {${mask}}, ${src}{1to4}}", 7759 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7760 VK4WM:$mask, f64mem:$src), 0, "att">; 7761 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7762 "$dst {${mask}} {z}, ${src}{1to4}}", 7763 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7764 VK4WM:$mask, f64mem:$src), 0, "att">; 7765} 7766 7767defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, 7768 VEX_W, PD, EVEX_CD8<64, CD8VF>; 7769defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, 7770 PS, EVEX_CD8<32, CD8VH>; 7771 7772let Predicates = [HasVLX] in { 7773 // Special patterns to allow use of X86vmfpround for masking. Instruction 7774 // patterns have been disabled with null_frag. 7775 def : Pat<(X86any_vfpround (v2f64 VR128X:$src)), 7776 (VCVTPD2PSZ128rr VR128X:$src)>; 7777 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), 7778 VK2WM:$mask), 7779 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 7780 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV, 7781 VK2WM:$mask), 7782 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 7783 7784 def : Pat<(X86any_vfpround (loadv2f64 addr:$src)), 7785 (VCVTPD2PSZ128rm addr:$src)>; 7786 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), 7787 VK2WM:$mask), 7788 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7789 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV, 7790 VK2WM:$mask), 7791 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; 7792 7793 def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))), 7794 (VCVTPD2PSZ128rmb addr:$src)>; 7795 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7796 (v4f32 VR128X:$src0), VK2WM:$mask), 7797 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 7798 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 7799 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 7800 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 7801} 7802 7803// Convert Signed/Unsigned Doubleword to Double 7804let Uses = []<Register>, mayRaiseFPException = 0 in 7805multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7806 SDNode MaskOpNode, SDNode OpNode128, 7807 SDNode MaskOpNode128, 7808 X86SchedWriteWidths sched> { 7809 // No rounding in this op 7810 let Predicates = [HasAVX512] in 7811 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 7812 MaskOpNode, sched.ZMM>, EVEX_V512; 7813 7814 let Predicates = [HasVLX] in { 7815 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 7816 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 7817 "", i64mem, VK2WM, 7818 (v2f64 (OpNode128 (bc_v4i32 7819 (v2i64 7820 (scalar_to_vector (loadi64 addr:$src)))))), 7821 (v2f64 (MaskOpNode128 (bc_v4i32 7822 (v2i64 7823 (scalar_to_vector (loadi64 addr:$src))))))>, 7824 EVEX_V128; 7825 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 7826 MaskOpNode, sched.YMM>, EVEX_V256; 7827 } 7828} 7829 7830// Convert Signed/Unsigned Doubleword to Float 7831multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7832 SDNode MaskOpNode, SDNode OpNodeRnd, 7833 X86SchedWriteWidths sched> { 7834 let Predicates = [HasAVX512] in 7835 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 7836 MaskOpNode, sched.ZMM>, 7837 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 7838 OpNodeRnd, sched.ZMM>, EVEX_V512; 7839 7840 let Predicates = [HasVLX] in { 7841 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 7842 MaskOpNode, sched.XMM>, EVEX_V128; 7843 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 7844 MaskOpNode, sched.YMM>, EVEX_V256; 7845 } 7846} 7847 7848// Convert Float to Signed/Unsigned Doubleword with truncation 7849multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7850 SDNode MaskOpNode, 7851 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 7852 let Predicates = [HasAVX512] in { 7853 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7854 MaskOpNode, sched.ZMM>, 7855 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 7856 OpNodeSAE, sched.ZMM>, EVEX_V512; 7857 } 7858 let Predicates = [HasVLX] in { 7859 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7860 MaskOpNode, sched.XMM>, EVEX_V128; 7861 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7862 MaskOpNode, sched.YMM>, EVEX_V256; 7863 } 7864} 7865 7866// Convert Float to Signed/Unsigned Doubleword 7867multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7868 SDNode MaskOpNode, SDNode OpNodeRnd, 7869 X86SchedWriteWidths sched> { 7870 let Predicates = [HasAVX512] in { 7871 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7872 MaskOpNode, sched.ZMM>, 7873 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 7874 OpNodeRnd, sched.ZMM>, EVEX_V512; 7875 } 7876 let Predicates = [HasVLX] in { 7877 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7878 MaskOpNode, sched.XMM>, EVEX_V128; 7879 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7880 MaskOpNode, sched.YMM>, EVEX_V256; 7881 } 7882} 7883 7884// Convert Double to Signed/Unsigned Doubleword with truncation 7885multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7886 SDNode MaskOpNode, SDNode OpNodeSAE, 7887 X86SchedWriteWidths sched> { 7888 let Predicates = [HasAVX512] in { 7889 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7890 MaskOpNode, sched.ZMM>, 7891 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 7892 OpNodeSAE, sched.ZMM>, EVEX_V512; 7893 } 7894 let Predicates = [HasVLX] in { 7895 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7896 // memory forms of these instructions in Asm Parser. They have the same 7897 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7898 // due to the same reason. 7899 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7900 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7901 VK2WM>, EVEX_V128; 7902 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7903 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7904 } 7905 7906 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7907 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 7908 VR128X:$src), 0, "att">; 7909 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7910 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7911 VK2WM:$mask, VR128X:$src), 0, "att">; 7912 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7913 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7914 VK2WM:$mask, VR128X:$src), 0, "att">; 7915 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7916 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7917 f64mem:$src), 0, "att">; 7918 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7919 "$dst {${mask}}, ${src}{1to2}}", 7920 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7921 VK2WM:$mask, f64mem:$src), 0, "att">; 7922 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7923 "$dst {${mask}} {z}, ${src}{1to2}}", 7924 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7925 VK2WM:$mask, f64mem:$src), 0, "att">; 7926 7927 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7928 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 7929 VR256X:$src), 0, "att">; 7930 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7931 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7932 VK4WM:$mask, VR256X:$src), 0, "att">; 7933 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7934 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7935 VK4WM:$mask, VR256X:$src), 0, "att">; 7936 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7937 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 7938 f64mem:$src), 0, "att">; 7939 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7940 "$dst {${mask}}, ${src}{1to4}}", 7941 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7942 VK4WM:$mask, f64mem:$src), 0, "att">; 7943 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7944 "$dst {${mask}} {z}, ${src}{1to4}}", 7945 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7946 VK4WM:$mask, f64mem:$src), 0, "att">; 7947} 7948 7949// Convert Double to Signed/Unsigned Doubleword 7950multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7951 SDNode MaskOpNode, SDNode OpNodeRnd, 7952 X86SchedWriteWidths sched> { 7953 let Predicates = [HasAVX512] in { 7954 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7955 MaskOpNode, sched.ZMM>, 7956 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 7957 OpNodeRnd, sched.ZMM>, EVEX_V512; 7958 } 7959 let Predicates = [HasVLX] in { 7960 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7961 // memory forms of these instructions in Asm Parcer. They have the same 7962 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7963 // due to the same reason. 7964 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7965 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 7966 VK2WM>, EVEX_V128; 7967 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7968 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7969 } 7970 7971 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7972 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7973 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7974 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7975 VK2WM:$mask, VR128X:$src), 0, "att">; 7976 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7977 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7978 VK2WM:$mask, VR128X:$src), 0, "att">; 7979 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7980 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 7981 f64mem:$src), 0, "att">; 7982 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7983 "$dst {${mask}}, ${src}{1to2}}", 7984 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7985 VK2WM:$mask, f64mem:$src), 0, "att">; 7986 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7987 "$dst {${mask}} {z}, ${src}{1to2}}", 7988 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7989 VK2WM:$mask, f64mem:$src), 0, "att">; 7990 7991 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7992 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7993 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7994 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7995 VK4WM:$mask, VR256X:$src), 0, "att">; 7996 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 7997 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7998 VK4WM:$mask, VR256X:$src), 0, "att">; 7999 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8000 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8001 f64mem:$src), 0, "att">; 8002 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8003 "$dst {${mask}}, ${src}{1to4}}", 8004 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8005 VK4WM:$mask, f64mem:$src), 0, "att">; 8006 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8007 "$dst {${mask}} {z}, ${src}{1to4}}", 8008 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8009 VK4WM:$mask, f64mem:$src), 0, "att">; 8010} 8011 8012// Convert Double to Signed/Unsigned Quardword 8013multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8014 SDNode MaskOpNode, SDNode OpNodeRnd, 8015 X86SchedWriteWidths sched> { 8016 let Predicates = [HasDQI] in { 8017 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8018 MaskOpNode, sched.ZMM>, 8019 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8020 OpNodeRnd, sched.ZMM>, EVEX_V512; 8021 } 8022 let Predicates = [HasDQI, HasVLX] in { 8023 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8024 MaskOpNode, sched.XMM>, EVEX_V128; 8025 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8026 MaskOpNode, sched.YMM>, EVEX_V256; 8027 } 8028} 8029 8030// Convert Double to Signed/Unsigned Quardword with truncation 8031multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8032 SDNode MaskOpNode, SDNode OpNodeRnd, 8033 X86SchedWriteWidths sched> { 8034 let Predicates = [HasDQI] in { 8035 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8036 MaskOpNode, sched.ZMM>, 8037 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8038 OpNodeRnd, sched.ZMM>, EVEX_V512; 8039 } 8040 let Predicates = [HasDQI, HasVLX] in { 8041 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8042 MaskOpNode, sched.XMM>, EVEX_V128; 8043 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8044 MaskOpNode, sched.YMM>, EVEX_V256; 8045 } 8046} 8047 8048// Convert Signed/Unsigned Quardword to Double 8049multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 8050 SDNode MaskOpNode, SDNode OpNodeRnd, 8051 X86SchedWriteWidths sched> { 8052 let Predicates = [HasDQI] in { 8053 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8054 MaskOpNode, sched.ZMM>, 8055 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8056 OpNodeRnd, sched.ZMM>, EVEX_V512; 8057 } 8058 let Predicates = [HasDQI, HasVLX] in { 8059 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8060 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 8061 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8062 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 8063 } 8064} 8065 8066// Convert Float to Signed/Unsigned Quardword 8067multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8068 SDNode MaskOpNode, SDNode OpNodeRnd, 8069 X86SchedWriteWidths sched> { 8070 let Predicates = [HasDQI] in { 8071 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8072 MaskOpNode, sched.ZMM>, 8073 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8074 OpNodeRnd, sched.ZMM>, EVEX_V512; 8075 } 8076 let Predicates = [HasDQI, HasVLX] in { 8077 // Explicitly specified broadcast string, since we take only 2 elements 8078 // from v4f32x_info source 8079 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8080 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8081 (v2i64 (OpNode (bc_v4f32 8082 (v2f64 8083 (scalar_to_vector (loadf64 addr:$src)))))), 8084 (v2i64 (MaskOpNode (bc_v4f32 8085 (v2f64 8086 (scalar_to_vector (loadf64 addr:$src))))))>, 8087 EVEX_V128; 8088 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8089 MaskOpNode, sched.YMM>, EVEX_V256; 8090 } 8091} 8092 8093// Convert Float to Signed/Unsigned Quardword with truncation 8094multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8095 SDNode MaskOpNode, SDNode OpNodeRnd, 8096 X86SchedWriteWidths sched> { 8097 let Predicates = [HasDQI] in { 8098 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8099 MaskOpNode, sched.ZMM>, 8100 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8101 OpNodeRnd, sched.ZMM>, EVEX_V512; 8102 } 8103 let Predicates = [HasDQI, HasVLX] in { 8104 // Explicitly specified broadcast string, since we take only 2 elements 8105 // from v4f32x_info source 8106 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8107 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8108 (v2i64 (OpNode (bc_v4f32 8109 (v2f64 8110 (scalar_to_vector (loadf64 addr:$src)))))), 8111 (v2i64 (MaskOpNode (bc_v4f32 8112 (v2f64 8113 (scalar_to_vector (loadf64 addr:$src))))))>, 8114 EVEX_V128; 8115 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8116 MaskOpNode, sched.YMM>, EVEX_V256; 8117 } 8118} 8119 8120// Convert Signed/Unsigned Quardword to Float 8121multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 8122 SDNode MaskOpNode, SDNode OpNodeRnd, 8123 X86SchedWriteWidths sched> { 8124 let Predicates = [HasDQI] in { 8125 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, 8126 MaskOpNode, sched.ZMM>, 8127 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, 8128 OpNodeRnd, sched.ZMM>, EVEX_V512; 8129 } 8130 let Predicates = [HasDQI, HasVLX] in { 8131 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8132 // memory forms of these instructions in Asm Parcer. They have the same 8133 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8134 // due to the same reason. 8135 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag, 8136 null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>, 8137 EVEX_V128, NotEVEX2VEXConvertible; 8138 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, 8139 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256, 8140 NotEVEX2VEXConvertible; 8141 } 8142 8143 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8144 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8145 VR128X:$src), 0, "att">; 8146 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8147 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8148 VK2WM:$mask, VR128X:$src), 0, "att">; 8149 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8150 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8151 VK2WM:$mask, VR128X:$src), 0, "att">; 8152 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8153 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8154 i64mem:$src), 0, "att">; 8155 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8156 "$dst {${mask}}, ${src}{1to2}}", 8157 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8158 VK2WM:$mask, i64mem:$src), 0, "att">; 8159 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8160 "$dst {${mask}} {z}, ${src}{1to2}}", 8161 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8162 VK2WM:$mask, i64mem:$src), 0, "att">; 8163 8164 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8165 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8166 VR256X:$src), 0, "att">; 8167 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8168 "$dst {${mask}}, $src}", 8169 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8170 VK4WM:$mask, VR256X:$src), 0, "att">; 8171 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8172 "$dst {${mask}} {z}, $src}", 8173 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8174 VK4WM:$mask, VR256X:$src), 0, "att">; 8175 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8176 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8177 i64mem:$src), 0, "att">; 8178 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8179 "$dst {${mask}}, ${src}{1to4}}", 8180 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8181 VK4WM:$mask, i64mem:$src), 0, "att">; 8182 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8183 "$dst {${mask}} {z}, ${src}{1to4}}", 8184 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8185 VK4WM:$mask, i64mem:$src), 0, "att">; 8186} 8187 8188defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8189 X86any_VSintToFP, X86VSintToFP, 8190 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8191 8192defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8193 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8194 PS, EVEX_CD8<32, CD8VF>; 8195 8196defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8197 X86cvttp2si, X86cvttp2siSAE, 8198 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; 8199 8200defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8201 X86cvttp2si, X86cvttp2siSAE, 8202 SchedWriteCvtPD2DQ>, 8203 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8204 8205defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8206 X86cvttp2ui, X86cvttp2uiSAE, 8207 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; 8208 8209defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8210 X86cvttp2ui, X86cvttp2uiSAE, 8211 SchedWriteCvtPD2DQ>, 8212 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8213 8214defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8215 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8216 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8217 8218defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8219 uint_to_fp, X86VUintToFpRnd, 8220 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; 8221 8222defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8223 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8224 EVEX_CD8<32, CD8VF>; 8225 8226defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8227 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8228 VEX_W, EVEX_CD8<64, CD8VF>; 8229 8230defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8231 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8232 PS, EVEX_CD8<32, CD8VF>; 8233 8234defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8235 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8236 PS, EVEX_CD8<64, CD8VF>; 8237 8238defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8239 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8240 PD, EVEX_CD8<64, CD8VF>; 8241 8242defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8243 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8244 EVEX_CD8<32, CD8VH>; 8245 8246defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8247 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8248 PD, EVEX_CD8<64, CD8VF>; 8249 8250defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8251 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8252 EVEX_CD8<32, CD8VH>; 8253 8254defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8255 X86cvttp2si, X86cvttp2siSAE, 8256 SchedWriteCvtPD2DQ>, VEX_W, 8257 PD, EVEX_CD8<64, CD8VF>; 8258 8259defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8260 X86cvttp2si, X86cvttp2siSAE, 8261 SchedWriteCvtPS2DQ>, PD, 8262 EVEX_CD8<32, CD8VH>; 8263 8264defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8265 X86cvttp2ui, X86cvttp2uiSAE, 8266 SchedWriteCvtPD2DQ>, VEX_W, 8267 PD, EVEX_CD8<64, CD8VF>; 8268 8269defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8270 X86cvttp2ui, X86cvttp2uiSAE, 8271 SchedWriteCvtPS2DQ>, PD, 8272 EVEX_CD8<32, CD8VH>; 8273 8274defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8275 sint_to_fp, X86VSintToFpRnd, 8276 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; 8277 8278defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8279 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8280 VEX_W, XS, EVEX_CD8<64, CD8VF>; 8281 8282defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp, 8283 sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8284 VEX_W, PS, EVEX_CD8<64, CD8VF>; 8285 8286defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp, 8287 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>, 8288 VEX_W, XD, EVEX_CD8<64, CD8VF>; 8289 8290let Predicates = [HasVLX] in { 8291 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8292 // patterns have been disabled with null_frag. 8293 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8294 (VCVTPD2DQZ128rr VR128X:$src)>; 8295 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8296 VK2WM:$mask), 8297 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8298 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8299 VK2WM:$mask), 8300 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8301 8302 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8303 (VCVTPD2DQZ128rm addr:$src)>; 8304 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8305 VK2WM:$mask), 8306 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8307 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8308 VK2WM:$mask), 8309 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8310 8311 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8312 (VCVTPD2DQZ128rmb addr:$src)>; 8313 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8314 (v4i32 VR128X:$src0), VK2WM:$mask), 8315 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8316 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8317 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8318 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8319 8320 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8321 // patterns have been disabled with null_frag. 8322 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8323 (VCVTTPD2DQZ128rr VR128X:$src)>; 8324 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8325 VK2WM:$mask), 8326 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8327 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8328 VK2WM:$mask), 8329 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8330 8331 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8332 (VCVTTPD2DQZ128rm addr:$src)>; 8333 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8334 VK2WM:$mask), 8335 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8336 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8337 VK2WM:$mask), 8338 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8339 8340 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8341 (VCVTTPD2DQZ128rmb addr:$src)>; 8342 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8343 (v4i32 VR128X:$src0), VK2WM:$mask), 8344 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8345 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8346 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8347 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8348 8349 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8350 // patterns have been disabled with null_frag. 8351 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8352 (VCVTPD2UDQZ128rr VR128X:$src)>; 8353 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8354 VK2WM:$mask), 8355 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8356 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8357 VK2WM:$mask), 8358 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8359 8360 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8361 (VCVTPD2UDQZ128rm addr:$src)>; 8362 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8363 VK2WM:$mask), 8364 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8365 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8366 VK2WM:$mask), 8367 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8368 8369 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8370 (VCVTPD2UDQZ128rmb addr:$src)>; 8371 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8372 (v4i32 VR128X:$src0), VK2WM:$mask), 8373 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8374 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8375 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8376 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8377 8378 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8379 // patterns have been disabled with null_frag. 8380 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8381 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8382 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8383 VK2WM:$mask), 8384 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8385 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8386 VK2WM:$mask), 8387 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8388 8389 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 8390 (VCVTTPD2UDQZ128rm addr:$src)>; 8391 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8392 VK2WM:$mask), 8393 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8394 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8395 VK2WM:$mask), 8396 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8397 8398 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8399 (VCVTTPD2UDQZ128rmb addr:$src)>; 8400 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8401 (v4i32 VR128X:$src0), VK2WM:$mask), 8402 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8403 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8404 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8405 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8406} 8407 8408let Predicates = [HasDQI, HasVLX] in { 8409 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8410 (VCVTPS2QQZ128rm addr:$src)>; 8411 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8412 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8413 VR128X:$src0)), 8414 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8415 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8416 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8417 v2i64x_info.ImmAllZerosV)), 8418 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8419 8420 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8421 (VCVTPS2UQQZ128rm addr:$src)>; 8422 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8423 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8424 VR128X:$src0)), 8425 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8426 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8427 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8428 v2i64x_info.ImmAllZerosV)), 8429 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8430 8431 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8432 (VCVTTPS2QQZ128rm addr:$src)>; 8433 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8434 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8435 VR128X:$src0)), 8436 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8437 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8438 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8439 v2i64x_info.ImmAllZerosV)), 8440 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8441 8442 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8443 (VCVTTPS2UQQZ128rm addr:$src)>; 8444 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8445 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8446 VR128X:$src0)), 8447 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8448 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8449 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8450 v2i64x_info.ImmAllZerosV)), 8451 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8452} 8453 8454let Predicates = [HasVLX] in { 8455 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8456 (VCVTDQ2PDZ128rm addr:$src)>; 8457 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8458 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8459 VR128X:$src0)), 8460 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8461 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8462 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8463 v2f64x_info.ImmAllZerosV)), 8464 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8465 8466 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8467 (VCVTUDQ2PDZ128rm addr:$src)>; 8468 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8469 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8470 VR128X:$src0)), 8471 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8472 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8473 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8474 v2f64x_info.ImmAllZerosV)), 8475 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8476} 8477 8478let Predicates = [HasDQI, HasVLX] in { 8479 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 8480 // patterns have been disabled with null_frag. 8481 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))), 8482 (VCVTQQ2PSZ128rr VR128X:$src)>; 8483 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8484 VK2WM:$mask), 8485 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8486 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8487 VK2WM:$mask), 8488 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8489 8490 def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))), 8491 (VCVTQQ2PSZ128rm addr:$src)>; 8492 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8493 VK2WM:$mask), 8494 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8495 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8496 VK2WM:$mask), 8497 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8498 8499 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8500 (VCVTQQ2PSZ128rmb addr:$src)>; 8501 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8502 (v4f32 VR128X:$src0), VK2WM:$mask), 8503 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8504 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8505 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8506 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8507 8508 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 8509 // patterns have been disabled with null_frag. 8510 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))), 8511 (VCVTUQQ2PSZ128rr VR128X:$src)>; 8512 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), 8513 VK2WM:$mask), 8514 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8515 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, 8516 VK2WM:$mask), 8517 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; 8518 8519 def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))), 8520 (VCVTUQQ2PSZ128rm addr:$src)>; 8521 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), 8522 VK2WM:$mask), 8523 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8524 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, 8525 VK2WM:$mask), 8526 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; 8527 8528 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 8529 (VCVTUQQ2PSZ128rmb addr:$src)>; 8530 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8531 (v4f32 VR128X:$src0), VK2WM:$mask), 8532 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8533 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 8534 v4f32x_info.ImmAllZerosV, VK2WM:$mask), 8535 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; 8536} 8537 8538//===----------------------------------------------------------------------===// 8539// Half precision conversion instructions 8540//===----------------------------------------------------------------------===// 8541 8542let Uses = [MXCSR], mayRaiseFPException = 1 in 8543multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8544 X86MemOperand x86memop, dag ld_dag, 8545 X86FoldableSchedWrite sched> { 8546 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8547 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8548 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 8549 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8550 T8PD, Sched<[sched]>; 8551 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8552 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8553 (X86any_cvtph2ps (_src.VT ld_dag)), 8554 (X86cvtph2ps (_src.VT ld_dag))>, 8555 T8PD, Sched<[sched.Folded]>; 8556} 8557 8558multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8559 X86FoldableSchedWrite sched> { 8560 let Uses = [MXCSR] in 8561 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8562 (ins _src.RC:$src), "vcvtph2ps", 8563 "{sae}, $src", "$src, {sae}", 8564 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8565 T8PD, EVEX_B, Sched<[sched]>; 8566} 8567 8568let Predicates = [HasAVX512] in 8569 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 8570 (load addr:$src), WriteCvtPH2PSZ>, 8571 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8572 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8573 8574let Predicates = [HasVLX] in { 8575 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8576 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 8577 EVEX_CD8<32, CD8VH>; 8578 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8579 (bitconvert (v2i64 (X86vzload64 addr:$src))), 8580 WriteCvtPH2PS>, EVEX, EVEX_V128, 8581 EVEX_CD8<32, CD8VH>; 8582 8583 // Pattern match vcvtph2ps of a scalar i64 load. 8584 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 8585 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8586 (VCVTPH2PSZ128rm addr:$src)>; 8587} 8588 8589multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8590 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8591let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8592 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8593 (ins _src.RC:$src1, i32u8imm:$src2), 8594 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8595 [(set _dest.RC:$dst, 8596 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8597 Sched<[RR]>; 8598 let Constraints = "$src0 = $dst" in 8599 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8600 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8601 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8602 [(set _dest.RC:$dst, 8603 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8604 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8605 Sched<[RR]>, EVEX_K; 8606 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8607 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8608 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8609 [(set _dest.RC:$dst, 8610 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8611 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8612 Sched<[RR]>, EVEX_KZ; 8613 let hasSideEffects = 0, mayStore = 1 in { 8614 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8615 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8616 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8617 Sched<[MR]>; 8618 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8619 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8620 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8621 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 8622 } 8623} 8624} 8625 8626multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8627 SchedWrite Sched> { 8628 let hasSideEffects = 0, Uses = [MXCSR] in 8629 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 8630 (outs _dest.RC:$dst), 8631 (ins _src.RC:$src1, i32u8imm:$src2), 8632 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 8633 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 8634} 8635 8636let Predicates = [HasAVX512] in { 8637 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8638 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8639 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8640 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8641 8642 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 8643 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 8644} 8645 8646let Predicates = [HasVLX] in { 8647 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 8648 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 8649 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 8650 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 8651 WriteCvtPS2PH, WriteCvtPS2PHSt>, 8652 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 8653 8654 def : Pat<(store (f64 (extractelt 8655 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 8656 (iPTR 0))), addr:$dst), 8657 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8658 def : Pat<(store (i64 (extractelt 8659 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 8660 (iPTR 0))), addr:$dst), 8661 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 8662 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 8663 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 8664} 8665 8666// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 8667multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 8668 string OpcodeStr, Domain d, 8669 X86FoldableSchedWrite sched = WriteFComX> { 8670 let hasSideEffects = 0, Uses = [MXCSR] in 8671 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 8672 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 8673 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 8674} 8675 8676let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8677 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 8678 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8679 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 8680 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8681 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 8682 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8683 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 8684 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8685} 8686 8687let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8688 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 8689 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8690 EVEX_CD8<32, CD8VT1>; 8691 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 8692 "ucomisd", SSEPackedDouble>, PD, EVEX, 8693 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8694 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 8695 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8696 EVEX_CD8<32, CD8VT1>; 8697 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 8698 "comisd", SSEPackedDouble>, PD, EVEX, 8699 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8700 let isCodeGenOnly = 1 in { 8701 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 8702 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8703 EVEX_CD8<32, CD8VT1>; 8704 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 8705 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 8706 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8707 8708 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 8709 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 8710 EVEX_CD8<32, CD8VT1>; 8711 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 8712 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 8713 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8714 } 8715} 8716 8717/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd 8718multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8719 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8720 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 8721 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8722 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8723 "$src2, $src1", "$src1, $src2", 8724 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8725 EVEX_4V, VEX_LIG, Sched<[sched]>; 8726 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8727 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8728 "$src2, $src1", "$src1, $src2", 8729 (OpNode (_.VT _.RC:$src1), 8730 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, 8731 Sched<[sched.Folded, sched.ReadAfterFold]>; 8732} 8733} 8734 8735defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 8736 f32x_info>, EVEX_CD8<32, CD8VT1>, 8737 T8PD; 8738defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 8739 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 8740 T8PD; 8741defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 8742 SchedWriteFRsqrt.Scl, f32x_info>, 8743 EVEX_CD8<32, CD8VT1>, T8PD; 8744defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 8745 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 8746 EVEX_CD8<64, CD8VT1>, T8PD; 8747 8748/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 8749multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 8750 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8751 let ExeDomain = _.ExeDomain in { 8752 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8753 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8754 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 8755 Sched<[sched]>; 8756 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8757 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8758 (OpNode (_.VT 8759 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 8760 Sched<[sched.Folded, sched.ReadAfterFold]>; 8761 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8762 (ins _.ScalarMemOp:$src), OpcodeStr, 8763 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8764 (OpNode (_.VT 8765 (_.BroadcastLdFrag addr:$src)))>, 8766 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8767 } 8768} 8769 8770let Uses = [MXCSR] in 8771multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 8772 X86SchedWriteWidths sched> { 8773 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM, 8774 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 8775 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM, 8776 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8777 8778 // Define only if AVX512VL feature is present. 8779 let Predicates = [HasVLX] in { 8780 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8781 OpNode, sched.XMM, v4f32x_info>, 8782 EVEX_V128, EVEX_CD8<32, CD8VF>; 8783 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8784 OpNode, sched.YMM, v8f32x_info>, 8785 EVEX_V256, EVEX_CD8<32, CD8VF>; 8786 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8787 OpNode, sched.XMM, v2f64x_info>, 8788 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 8789 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8790 OpNode, sched.YMM, v4f64x_info>, 8791 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 8792 } 8793} 8794 8795defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; 8796defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; 8797 8798/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 8799multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 8800 SDNode OpNode, SDNode OpNodeSAE, 8801 X86FoldableSchedWrite sched> { 8802 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 8803 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8804 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8805 "$src2, $src1", "$src1, $src2", 8806 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8807 Sched<[sched]>, SIMD_EXC; 8808 8809 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8810 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8811 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 8812 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8813 EVEX_B, Sched<[sched]>; 8814 8815 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8816 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8817 "$src2, $src1", "$src1, $src2", 8818 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 8819 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8820 } 8821} 8822 8823multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8824 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 8825 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 8826 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG; 8827 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 8828 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 8829} 8830 8831let Predicates = [HasERI] in { 8832 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 8833 SchedWriteFRcp.Scl>, T8PD, EVEX_4V; 8834 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 8835 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; 8836} 8837 8838defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 8839 SchedWriteFRnd.Scl>, T8PD, EVEX_4V; 8840/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 8841 8842multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8843 SDNode OpNode, X86FoldableSchedWrite sched> { 8844 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8845 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8846 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8847 (OpNode (_.VT _.RC:$src))>, 8848 Sched<[sched]>; 8849 8850 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8851 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8852 (OpNode (_.VT 8853 (bitconvert (_.LdFrag addr:$src))))>, 8854 Sched<[sched.Folded, sched.ReadAfterFold]>; 8855 8856 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8857 (ins _.ScalarMemOp:$src), OpcodeStr, 8858 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8859 (OpNode (_.VT 8860 (_.BroadcastLdFrag addr:$src)))>, 8861 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8862 } 8863} 8864multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8865 SDNode OpNode, X86FoldableSchedWrite sched> { 8866 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 8867 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8868 (ins _.RC:$src), OpcodeStr, 8869 "{sae}, $src", "$src, {sae}", 8870 (OpNode (_.VT _.RC:$src))>, 8871 EVEX_B, Sched<[sched]>; 8872} 8873 8874multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 8875 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8876 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8877 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 8878 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 8879 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8880 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 8881 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8882} 8883 8884multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 8885 SDNode OpNode, X86SchedWriteWidths sched> { 8886 // Define only if AVX512VL feature is present. 8887 let Predicates = [HasVLX] in { 8888 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 8889 sched.XMM>, 8890 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 8891 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 8892 sched.YMM>, 8893 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 8894 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 8895 sched.XMM>, 8896 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8897 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 8898 sched.YMM>, 8899 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8900 } 8901} 8902 8903let Predicates = [HasERI] in { 8904 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 8905 SchedWriteFRsqrt>, EVEX; 8906 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 8907 SchedWriteFRcp>, EVEX; 8908 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 8909 SchedWriteFAdd>, EVEX; 8910} 8911defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 8912 SchedWriteFRnd>, 8913 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 8914 SchedWriteFRnd>, EVEX; 8915 8916multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 8917 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8918 let ExeDomain = _.ExeDomain in 8919 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8920 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 8921 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 8922 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8923} 8924 8925multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 8926 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8927 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8928 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 8929 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8930 (_.VT (any_fsqrt _.RC:$src)), 8931 (_.VT (fsqrt _.RC:$src))>, EVEX, 8932 Sched<[sched]>; 8933 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 8934 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8935 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 8936 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 8937 Sched<[sched.Folded, sched.ReadAfterFold]>; 8938 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 8939 (ins _.ScalarMemOp:$src), OpcodeStr, 8940 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 8941 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 8942 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 8943 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 8944 } 8945} 8946 8947let Uses = [MXCSR], mayRaiseFPException = 1 in 8948multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 8949 X86SchedWriteSizes sched> { 8950 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8951 sched.PS.ZMM, v16f32_info>, 8952 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8953 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8954 sched.PD.ZMM, v8f64_info>, 8955 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8956 // Define only if AVX512VL feature is present. 8957 let Predicates = [HasVLX] in { 8958 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8959 sched.PS.XMM, v4f32x_info>, 8960 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 8961 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8962 sched.PS.YMM, v8f32x_info>, 8963 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 8964 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8965 sched.PD.XMM, v2f64x_info>, 8966 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8967 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8968 sched.PD.YMM, v4f64x_info>, 8969 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8970 } 8971} 8972 8973let Uses = [MXCSR] in 8974multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 8975 X86SchedWriteSizes sched> { 8976 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 8977 sched.PS.ZMM, v16f32_info>, 8978 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8979 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 8980 sched.PD.ZMM, v8f64_info>, 8981 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8982} 8983 8984multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 8985 X86VectorVTInfo _, string Name> { 8986 let ExeDomain = _.ExeDomain in { 8987 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8988 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8989 "$src2, $src1", "$src1, $src2", 8990 (X86fsqrts (_.VT _.RC:$src1), 8991 (_.VT _.RC:$src2))>, 8992 Sched<[sched]>, SIMD_EXC; 8993 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8994 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8995 "$src2, $src1", "$src1, $src2", 8996 (X86fsqrts (_.VT _.RC:$src1), 8997 (_.ScalarIntMemFrags addr:$src2))>, 8998 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 8999 let Uses = [MXCSR] in 9000 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9001 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9002 "$rc, $src2, $src1", "$src1, $src2, $rc", 9003 (X86fsqrtRnds (_.VT _.RC:$src1), 9004 (_.VT _.RC:$src2), 9005 (i32 timm:$rc))>, 9006 EVEX_B, EVEX_RC, Sched<[sched]>; 9007 9008 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { 9009 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9010 (ins _.FRC:$src1, _.FRC:$src2), 9011 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9012 Sched<[sched]>, SIMD_EXC; 9013 let mayLoad = 1 in 9014 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9015 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9016 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9017 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9018 } 9019 } 9020 9021 let Predicates = [HasAVX512] in { 9022 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9023 (!cast<Instruction>(Name#Zr) 9024 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9025 } 9026 9027 let Predicates = [HasAVX512, OptForSize] in { 9028 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9029 (!cast<Instruction>(Name#Zm) 9030 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9031 } 9032} 9033 9034multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9035 X86SchedWriteSizes sched> { 9036 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9037 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9038 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9039 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9040} 9041 9042defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9043 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9044 9045defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9046 9047multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9048 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9049 let ExeDomain = _.ExeDomain in { 9050 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9051 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9052 "$src3, $src2, $src1", "$src1, $src2, $src3", 9053 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9054 (i32 timm:$src3)))>, 9055 Sched<[sched]>, SIMD_EXC; 9056 9057 let Uses = [MXCSR] in 9058 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9059 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9060 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9061 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9062 (i32 timm:$src3)))>, EVEX_B, 9063 Sched<[sched]>; 9064 9065 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9066 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9067 OpcodeStr, 9068 "$src3, $src2, $src1", "$src1, $src2, $src3", 9069 (_.VT (X86RndScales _.RC:$src1, 9070 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9071 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9072 9073 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9074 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9075 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9076 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9077 []>, Sched<[sched]>, SIMD_EXC; 9078 9079 let mayLoad = 1 in 9080 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9081 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9082 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9083 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9084 } 9085 } 9086 9087 let Predicates = [HasAVX512] in { 9088 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9089 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9090 _.FRC:$src1, timm:$src2))>; 9091 } 9092 9093 let Predicates = [HasAVX512, OptForSize] in { 9094 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9095 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9096 addr:$src1, timm:$src2))>; 9097 } 9098} 9099 9100defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9101 SchedWriteFRnd.Scl, f32x_info>, 9102 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9103 EVEX_CD8<32, CD8VT1>; 9104 9105defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9106 SchedWriteFRnd.Scl, f64x_info>, 9107 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9108 EVEX_CD8<64, CD8VT1>; 9109 9110multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9111 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9112 dag OutMask, Predicate BasePredicate> { 9113 let Predicates = [BasePredicate] in { 9114 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9115 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9116 (extractelt _.VT:$dst, (iPTR 0))))), 9117 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9118 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9119 9120 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9121 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9122 ZeroFP))), 9123 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9124 OutMask, _.VT:$src2, _.VT:$src1)>; 9125 } 9126} 9127 9128defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9129 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9130 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9131defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9132 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9133 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9134 9135 9136//------------------------------------------------- 9137// Integer truncate and extend operations 9138//------------------------------------------------- 9139 9140// PatFrags that contain a select and a truncate op. The take operands in the 9141// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9142// either to the multiclasses. 9143def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9144 (vselect_mask node:$mask, 9145 (trunc node:$src), node:$src0)>; 9146def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9147 (vselect_mask node:$mask, 9148 (X86vtruncs node:$src), node:$src0)>; 9149def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9150 (vselect_mask node:$mask, 9151 (X86vtruncus node:$src), node:$src0)>; 9152 9153multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9154 SDPatternOperator MaskNode, 9155 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9156 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9157 let ExeDomain = DestInfo.ExeDomain in { 9158 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9159 (ins SrcInfo.RC:$src), 9160 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9161 [(set DestInfo.RC:$dst, 9162 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9163 EVEX, Sched<[sched]>; 9164 let Constraints = "$src0 = $dst" in 9165 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9166 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9167 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9168 [(set DestInfo.RC:$dst, 9169 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9170 (DestInfo.VT DestInfo.RC:$src0), 9171 SrcInfo.KRCWM:$mask))]>, 9172 EVEX, EVEX_K, Sched<[sched]>; 9173 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9174 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9175 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9176 [(set DestInfo.RC:$dst, 9177 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9178 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9179 EVEX, EVEX_KZ, Sched<[sched]>; 9180 } 9181 9182 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9183 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9184 (ins x86memop:$dst, SrcInfo.RC:$src), 9185 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9186 EVEX, Sched<[sched.Folded]>; 9187 9188 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9189 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9190 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9191 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9192 }//mayStore = 1, hasSideEffects = 0 9193} 9194 9195multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9196 X86VectorVTInfo DestInfo, 9197 PatFrag truncFrag, PatFrag mtruncFrag, 9198 string Name> { 9199 9200 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9201 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9202 addr:$dst, SrcInfo.RC:$src)>; 9203 9204 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9205 SrcInfo.KRCWM:$mask), 9206 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9207 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9208} 9209 9210multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9211 SDNode OpNode256, SDNode OpNode512, 9212 SDPatternOperator MaskNode128, 9213 SDPatternOperator MaskNode256, 9214 SDPatternOperator MaskNode512, 9215 X86FoldableSchedWrite sched, 9216 AVX512VLVectorVTInfo VTSrcInfo, 9217 X86VectorVTInfo DestInfoZ128, 9218 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9219 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9220 X86MemOperand x86memopZ, PatFrag truncFrag, 9221 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9222 9223 let Predicates = [HasVLX, prd] in { 9224 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9225 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9226 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, 9227 truncFrag, mtruncFrag, NAME>, EVEX_V128; 9228 9229 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9230 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9231 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, 9232 truncFrag, mtruncFrag, NAME>, EVEX_V256; 9233 } 9234 let Predicates = [prd] in 9235 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9236 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9237 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, 9238 truncFrag, mtruncFrag, NAME>, EVEX_V512; 9239} 9240 9241multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9242 SDPatternOperator MaskNode, 9243 X86FoldableSchedWrite sched, PatFrag StoreNode, 9244 PatFrag MaskedStoreNode, SDNode InVecNode, 9245 SDPatternOperator InVecMaskNode> { 9246 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9247 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9248 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9249 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9250 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9251} 9252 9253multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9254 SDPatternOperator MaskNode, 9255 X86FoldableSchedWrite sched, PatFrag StoreNode, 9256 PatFrag MaskedStoreNode, SDNode InVecNode, 9257 SDPatternOperator InVecMaskNode> { 9258 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9259 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9260 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9261 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9262 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9263} 9264 9265multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9266 SDPatternOperator MaskNode, 9267 X86FoldableSchedWrite sched, PatFrag StoreNode, 9268 PatFrag MaskedStoreNode, SDNode InVecNode, 9269 SDPatternOperator InVecMaskNode> { 9270 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9271 InVecMaskNode, MaskNode, MaskNode, sched, 9272 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9273 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9274 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9275} 9276 9277multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9278 SDPatternOperator MaskNode, 9279 X86FoldableSchedWrite sched, PatFrag StoreNode, 9280 PatFrag MaskedStoreNode, SDNode InVecNode, 9281 SDPatternOperator InVecMaskNode> { 9282 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9283 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9284 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9285 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9286 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9287} 9288 9289multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9290 SDPatternOperator MaskNode, 9291 X86FoldableSchedWrite sched, PatFrag StoreNode, 9292 PatFrag MaskedStoreNode, SDNode InVecNode, 9293 SDPatternOperator InVecMaskNode> { 9294 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9295 InVecMaskNode, MaskNode, MaskNode, sched, 9296 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9297 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9298 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9299} 9300 9301multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9302 SDPatternOperator MaskNode, 9303 X86FoldableSchedWrite sched, PatFrag StoreNode, 9304 PatFrag MaskedStoreNode, SDNode InVecNode, 9305 SDPatternOperator InVecMaskNode> { 9306 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9307 InVecMaskNode, MaskNode, MaskNode, sched, 9308 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9309 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9310 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9311} 9312 9313defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc, 9314 WriteShuffle256, truncstorevi8, 9315 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9316defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs, 9317 WriteShuffle256, truncstore_s_vi8, 9318 masked_truncstore_s_vi8, X86vtruncs, 9319 X86vmtruncs>; 9320defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, 9321 select_truncus, WriteShuffle256, 9322 truncstore_us_vi8, masked_truncstore_us_vi8, 9323 X86vtruncus, X86vmtruncus>; 9324 9325defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9326 WriteShuffle256, truncstorevi16, 9327 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9328defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9329 WriteShuffle256, truncstore_s_vi16, 9330 masked_truncstore_s_vi16, X86vtruncs, 9331 X86vmtruncs>; 9332defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9333 select_truncus, WriteShuffle256, 9334 truncstore_us_vi16, masked_truncstore_us_vi16, 9335 X86vtruncus, X86vmtruncus>; 9336 9337defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9338 WriteShuffle256, truncstorevi32, 9339 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9340defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9341 WriteShuffle256, truncstore_s_vi32, 9342 masked_truncstore_s_vi32, X86vtruncs, 9343 X86vmtruncs>; 9344defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9345 select_truncus, WriteShuffle256, 9346 truncstore_us_vi32, masked_truncstore_us_vi32, 9347 X86vtruncus, X86vmtruncus>; 9348 9349defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9350 WriteShuffle256, truncstorevi8, 9351 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9352defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9353 WriteShuffle256, truncstore_s_vi8, 9354 masked_truncstore_s_vi8, X86vtruncs, 9355 X86vmtruncs>; 9356defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9357 select_truncus, WriteShuffle256, 9358 truncstore_us_vi8, masked_truncstore_us_vi8, 9359 X86vtruncus, X86vmtruncus>; 9360 9361defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9362 WriteShuffle256, truncstorevi16, 9363 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9364defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9365 WriteShuffle256, truncstore_s_vi16, 9366 masked_truncstore_s_vi16, X86vtruncs, 9367 X86vmtruncs>; 9368defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9369 select_truncus, WriteShuffle256, 9370 truncstore_us_vi16, masked_truncstore_us_vi16, 9371 X86vtruncus, X86vmtruncus>; 9372 9373defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9374 WriteShuffle256, truncstorevi8, 9375 masked_truncstorevi8, X86vtrunc, 9376 X86vmtrunc>; 9377defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9378 WriteShuffle256, truncstore_s_vi8, 9379 masked_truncstore_s_vi8, X86vtruncs, 9380 X86vmtruncs>; 9381defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9382 select_truncus, WriteShuffle256, 9383 truncstore_us_vi8, masked_truncstore_us_vi8, 9384 X86vtruncus, X86vmtruncus>; 9385 9386let Predicates = [HasAVX512, NoVLX] in { 9387def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9388 (v8i16 (EXTRACT_SUBREG 9389 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9390 VR256X:$src, sub_ymm)))), sub_xmm))>; 9391def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9392 (v4i32 (EXTRACT_SUBREG 9393 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9394 VR256X:$src, sub_ymm)))), sub_xmm))>; 9395} 9396 9397let Predicates = [HasBWI, NoVLX] in { 9398def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9399 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9400 VR256X:$src, sub_ymm))), sub_xmm))>; 9401} 9402 9403// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9404multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9405 X86VectorVTInfo DestInfo, 9406 X86VectorVTInfo SrcInfo> { 9407 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9408 DestInfo.RC:$src0, 9409 SrcInfo.KRCWM:$mask)), 9410 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9411 SrcInfo.KRCWM:$mask, 9412 SrcInfo.RC:$src)>; 9413 9414 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9415 DestInfo.ImmAllZerosV, 9416 SrcInfo.KRCWM:$mask)), 9417 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9418 SrcInfo.RC:$src)>; 9419} 9420 9421let Predicates = [HasVLX] in { 9422defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9423defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9424defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9425} 9426 9427let Predicates = [HasAVX512] in { 9428defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9429defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9430defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9431 9432defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9433defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9434defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9435 9436defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9437defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9438defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9439} 9440 9441multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9442 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9443 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9444 let ExeDomain = DestInfo.ExeDomain in { 9445 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9446 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9447 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9448 EVEX, Sched<[sched]>; 9449 9450 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9451 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9452 (DestInfo.VT (LdFrag addr:$src))>, 9453 EVEX, Sched<[sched.Folded]>; 9454 } 9455} 9456 9457multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 9458 SDNode OpNode, SDNode InVecNode, string ExtTy, 9459 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9460 let Predicates = [HasVLX, HasBWI] in { 9461 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 9462 v16i8x_info, i64mem, LdFrag, InVecNode>, 9463 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9464 9465 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 9466 v16i8x_info, i128mem, LdFrag, OpNode>, 9467 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9468 } 9469 let Predicates = [HasBWI] in { 9470 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 9471 v32i8x_info, i256mem, LdFrag, OpNode>, 9472 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9473 } 9474} 9475 9476multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 9477 SDNode OpNode, SDNode InVecNode, string ExtTy, 9478 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9479 let Predicates = [HasVLX, HasAVX512] in { 9480 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9481 v16i8x_info, i32mem, LdFrag, InVecNode>, 9482 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9483 9484 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9485 v16i8x_info, i64mem, LdFrag, InVecNode>, 9486 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9487 } 9488 let Predicates = [HasAVX512] in { 9489 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9490 v16i8x_info, i128mem, LdFrag, OpNode>, 9491 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9492 } 9493} 9494 9495multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 9496 SDNode OpNode, SDNode InVecNode, string ExtTy, 9497 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9498 let Predicates = [HasVLX, HasAVX512] in { 9499 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9500 v16i8x_info, i16mem, LdFrag, InVecNode>, 9501 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 9502 9503 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9504 v16i8x_info, i32mem, LdFrag, InVecNode>, 9505 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 9506 } 9507 let Predicates = [HasAVX512] in { 9508 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9509 v16i8x_info, i64mem, LdFrag, InVecNode>, 9510 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 9511 } 9512} 9513 9514multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 9515 SDNode OpNode, SDNode InVecNode, string ExtTy, 9516 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9517 let Predicates = [HasVLX, HasAVX512] in { 9518 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9519 v8i16x_info, i64mem, LdFrag, InVecNode>, 9520 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9521 9522 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9523 v8i16x_info, i128mem, LdFrag, OpNode>, 9524 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9525 } 9526 let Predicates = [HasAVX512] in { 9527 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9528 v16i16x_info, i256mem, LdFrag, OpNode>, 9529 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9530 } 9531} 9532 9533multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 9534 SDNode OpNode, SDNode InVecNode, string ExtTy, 9535 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9536 let Predicates = [HasVLX, HasAVX512] in { 9537 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9538 v8i16x_info, i32mem, LdFrag, InVecNode>, 9539 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9540 9541 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9542 v8i16x_info, i64mem, LdFrag, InVecNode>, 9543 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9544 } 9545 let Predicates = [HasAVX512] in { 9546 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9547 v8i16x_info, i128mem, LdFrag, OpNode>, 9548 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9549 } 9550} 9551 9552multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 9553 SDNode OpNode, SDNode InVecNode, string ExtTy, 9554 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9555 9556 let Predicates = [HasVLX, HasAVX512] in { 9557 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9558 v4i32x_info, i64mem, LdFrag, InVecNode>, 9559 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 9560 9561 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9562 v4i32x_info, i128mem, LdFrag, OpNode>, 9563 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 9564 } 9565 let Predicates = [HasAVX512] in { 9566 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9567 v8i32x_info, i256mem, LdFrag, OpNode>, 9568 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 9569 } 9570} 9571 9572defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>; 9573defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>; 9574defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>; 9575defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>; 9576defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>; 9577defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>; 9578 9579defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>; 9580defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>; 9581defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>; 9582defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>; 9583defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>; 9584defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>; 9585 9586 9587// Patterns that we also need any extend versions of. aext_vector_inreg 9588// is currently legalized to zext_vector_inreg. 9589multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 9590 // 256-bit patterns 9591 let Predicates = [HasVLX, HasBWI] in { 9592 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 9593 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9594 } 9595 9596 let Predicates = [HasVLX] in { 9597 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 9598 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9599 9600 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 9601 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9602 } 9603 9604 // 512-bit patterns 9605 let Predicates = [HasBWI] in { 9606 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 9607 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 9608 } 9609 let Predicates = [HasAVX512] in { 9610 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 9611 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 9612 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 9613 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 9614 9615 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 9616 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 9617 9618 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 9619 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 9620 } 9621} 9622 9623multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 9624 SDNode InVecOp> : 9625 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 9626 // 128-bit patterns 9627 let Predicates = [HasVLX, HasBWI] in { 9628 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9629 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9630 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9631 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9632 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9633 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9634 } 9635 let Predicates = [HasVLX] in { 9636 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9637 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9638 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9639 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9640 9641 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 9642 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9643 9644 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9645 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9646 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9647 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9648 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9649 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9650 9651 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9652 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9653 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 9654 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9655 9656 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9657 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9658 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9659 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9660 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9661 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9662 } 9663 let Predicates = [HasVLX] in { 9664 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9665 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9666 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 9667 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9668 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9669 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9670 9671 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9672 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9673 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 9674 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9675 9676 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9677 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9678 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 9679 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9680 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 9681 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9682 } 9683 // 512-bit patterns 9684 let Predicates = [HasAVX512] in { 9685 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9686 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9687 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9688 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9689 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 9690 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9691 } 9692} 9693 9694defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 9695defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 9696 9697// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 9698// ext+trunc aggressively making it impossible to legalize the DAG to this 9699// pattern directly. 9700let Predicates = [HasAVX512, NoBWI] in { 9701def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9702 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 9703def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 9704 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 9705} 9706 9707//===----------------------------------------------------------------------===// 9708// GATHER - SCATTER Operations 9709 9710// FIXME: Improve scheduling of gather/scatter instructions. 9711multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9712 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 9713 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 9714 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 9715 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 9716 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 9717 !strconcat(OpcodeStr#_.Suffix, 9718 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 9719 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; 9720} 9721 9722multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 9723 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9724 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 9725 vy512xmem>, EVEX_V512, VEX_W; 9726 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 9727 vz512mem>, EVEX_V512, VEX_W; 9728let Predicates = [HasVLX] in { 9729 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 9730 vx256xmem>, EVEX_V256, VEX_W; 9731 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 9732 vy256xmem>, EVEX_V256, VEX_W; 9733 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 9734 vx128xmem>, EVEX_V128, VEX_W; 9735 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9736 vx128xmem>, EVEX_V128, VEX_W; 9737} 9738} 9739 9740multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 9741 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9742 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 9743 EVEX_V512; 9744 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 9745 EVEX_V512; 9746let Predicates = [HasVLX] in { 9747 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 9748 vy256xmem>, EVEX_V256; 9749 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9750 vy128xmem>, EVEX_V256; 9751 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 9752 vx128xmem>, EVEX_V128; 9753 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 9754 vx64xmem, VK2WM>, EVEX_V128; 9755} 9756} 9757 9758 9759defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 9760 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 9761 9762defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 9763 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 9764 9765multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9766 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 9767 9768let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 9769 hasSideEffects = 0 in 9770 9771 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 9772 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 9773 !strconcat(OpcodeStr#_.Suffix, 9774 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 9775 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9776 Sched<[WriteStore]>; 9777} 9778 9779multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 9780 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9781 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 9782 vy512xmem>, EVEX_V512, VEX_W; 9783 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 9784 vz512mem>, EVEX_V512, VEX_W; 9785let Predicates = [HasVLX] in { 9786 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 9787 vx256xmem>, EVEX_V256, VEX_W; 9788 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 9789 vy256xmem>, EVEX_V256, VEX_W; 9790 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 9791 vx128xmem>, EVEX_V128, VEX_W; 9792 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9793 vx128xmem>, EVEX_V128, VEX_W; 9794} 9795} 9796 9797multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 9798 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9799 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 9800 EVEX_V512; 9801 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 9802 EVEX_V512; 9803let Predicates = [HasVLX] in { 9804 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 9805 vy256xmem>, EVEX_V256; 9806 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9807 vy128xmem>, EVEX_V256; 9808 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 9809 vx128xmem>, EVEX_V128; 9810 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 9811 vx64xmem, VK2WM>, EVEX_V128; 9812} 9813} 9814 9815defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 9816 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 9817 9818defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 9819 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 9820 9821// prefetch 9822multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 9823 RegisterClass KRC, X86MemOperand memop> { 9824 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 9825 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 9826 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 9827 EVEX, EVEX_K, Sched<[WriteLoad]>; 9828} 9829 9830defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 9831 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9832 9833defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 9834 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9835 9836defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 9837 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9838 9839defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 9840 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9841 9842defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 9843 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9844 9845defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 9846 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9847 9848defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 9849 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9850 9851defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 9852 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9853 9854defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 9855 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9856 9857defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 9858 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9859 9860defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 9861 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9862 9863defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 9864 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9865 9866defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 9867 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9868 9869defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 9870 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9871 9872defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 9873 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9874 9875defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 9876 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9877 9878multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 9879def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 9880 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 9881 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 9882 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 9883} 9884 9885multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 9886 string OpcodeStr, Predicate prd> { 9887let Predicates = [prd] in 9888 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 9889 9890 let Predicates = [prd, HasVLX] in { 9891 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 9892 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 9893 } 9894} 9895 9896defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 9897defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 9898defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 9899defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 9900 9901multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 9902 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 9903 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 9904 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 9905 EVEX, Sched<[WriteMove]>; 9906} 9907 9908// Use 512bit version to implement 128/256 bit in case NoVLX. 9909multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 9910 X86VectorVTInfo _, 9911 string Name> { 9912 9913 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 9914 (_.KVT (COPY_TO_REGCLASS 9915 (!cast<Instruction>(Name#"Zrr") 9916 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 9917 _.RC:$src, _.SubRegIdx)), 9918 _.KRC))>; 9919} 9920 9921multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 9922 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 9923 let Predicates = [prd] in 9924 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 9925 EVEX_V512; 9926 9927 let Predicates = [prd, HasVLX] in { 9928 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 9929 EVEX_V256; 9930 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 9931 EVEX_V128; 9932 } 9933 let Predicates = [prd, NoVLX] in { 9934 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 9935 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 9936 } 9937} 9938 9939defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 9940 avx512vl_i8_info, HasBWI>; 9941defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 9942 avx512vl_i16_info, HasBWI>, VEX_W; 9943defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 9944 avx512vl_i32_info, HasDQI>; 9945defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 9946 avx512vl_i64_info, HasDQI>, VEX_W; 9947 9948// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 9949// is available, but BWI is not. We can't handle this in lowering because 9950// a target independent DAG combine likes to combine sext and trunc. 9951let Predicates = [HasDQI, NoBWI] in { 9952 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 9953 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9954 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 9955 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9956} 9957 9958let Predicates = [HasDQI, NoBWI, HasVLX] in { 9959 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 9960 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 9961} 9962 9963//===----------------------------------------------------------------------===// 9964// AVX-512 - COMPRESS and EXPAND 9965// 9966 9967multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 9968 string OpcodeStr, X86FoldableSchedWrite sched> { 9969 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 9970 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9971 (null_frag)>, AVX5128IBase, 9972 Sched<[sched]>; 9973 9974 let mayStore = 1, hasSideEffects = 0 in 9975 def mr : AVX5128I<opc, MRMDestMem, (outs), 9976 (ins _.MemOp:$dst, _.RC:$src), 9977 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9978 []>, EVEX_CD8<_.EltSize, CD8VT1>, 9979 Sched<[sched.Folded]>; 9980 9981 def mrk : AVX5128I<opc, MRMDestMem, (outs), 9982 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 9983 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9984 []>, 9985 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9986 Sched<[sched.Folded]>; 9987} 9988 9989multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9990 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 9991 (!cast<Instruction>(Name#_.ZSuffix#mrk) 9992 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 9993 9994 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 9995 (!cast<Instruction>(Name#_.ZSuffix#rrk) 9996 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 9997 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 9998 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 9999 _.KRCWM:$mask, _.RC:$src)>; 10000} 10001 10002multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10003 X86FoldableSchedWrite sched, 10004 AVX512VLVectorVTInfo VTInfo, 10005 Predicate Pred = HasAVX512> { 10006 let Predicates = [Pred] in 10007 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10008 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10009 10010 let Predicates = [Pred, HasVLX] in { 10011 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10012 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10013 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10014 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10015 } 10016} 10017 10018// FIXME: Is there a better scheduler class for VPCOMPRESS? 10019defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10020 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 10021defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10022 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 10023defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10024 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 10025defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10026 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 10027 10028// expand 10029multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10030 string OpcodeStr, X86FoldableSchedWrite sched> { 10031 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10032 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10033 (null_frag)>, AVX5128IBase, 10034 Sched<[sched]>; 10035 10036 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10037 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10038 (null_frag)>, 10039 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10040 Sched<[sched.Folded, sched.ReadAfterFold]>; 10041} 10042 10043multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10044 10045 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10046 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10047 _.KRCWM:$mask, addr:$src)>; 10048 10049 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10050 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10051 _.KRCWM:$mask, addr:$src)>; 10052 10053 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10054 (_.VT _.RC:$src0))), 10055 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10056 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10057 10058 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10059 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10060 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10061 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10062 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10063 _.KRCWM:$mask, _.RC:$src)>; 10064} 10065 10066multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10067 X86FoldableSchedWrite sched, 10068 AVX512VLVectorVTInfo VTInfo, 10069 Predicate Pred = HasAVX512> { 10070 let Predicates = [Pred] in 10071 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10072 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10073 10074 let Predicates = [Pred, HasVLX] in { 10075 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10076 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10077 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10078 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10079 } 10080} 10081 10082// FIXME: Is there a better scheduler class for VPEXPAND? 10083defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10084 avx512vl_i32_info>, EVEX; 10085defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10086 avx512vl_i64_info>, EVEX, VEX_W; 10087defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10088 avx512vl_f32_info>, EVEX; 10089defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10090 avx512vl_f64_info>, EVEX, VEX_W; 10091 10092//handle instruction reg_vec1 = op(reg_vec,imm) 10093// op(mem_vec,imm) 10094// op(broadcast(eltVt),imm) 10095//all instruction created with FROUND_CURRENT 10096multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10097 SDNode OpNode, SDNode MaskOpNode, 10098 X86FoldableSchedWrite sched, 10099 X86VectorVTInfo _> { 10100 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10101 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10102 (ins _.RC:$src1, i32u8imm:$src2), 10103 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10104 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10105 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10106 Sched<[sched]>; 10107 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10108 (ins _.MemOp:$src1, i32u8imm:$src2), 10109 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10110 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10111 (i32 timm:$src2)), 10112 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10113 (i32 timm:$src2))>, 10114 Sched<[sched.Folded, sched.ReadAfterFold]>; 10115 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10116 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10117 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10118 "${src1}"#_.BroadcastStr#", $src2", 10119 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10120 (i32 timm:$src2)), 10121 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10122 (i32 timm:$src2))>, EVEX_B, 10123 Sched<[sched.Folded, sched.ReadAfterFold]>; 10124 } 10125} 10126 10127//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10128multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10129 SDNode OpNode, X86FoldableSchedWrite sched, 10130 X86VectorVTInfo _> { 10131 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10132 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10133 (ins _.RC:$src1, i32u8imm:$src2), 10134 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10135 "$src1, {sae}, $src2", 10136 (OpNode (_.VT _.RC:$src1), 10137 (i32 timm:$src2))>, 10138 EVEX_B, Sched<[sched]>; 10139} 10140 10141multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10142 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10143 SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10144 Predicate prd>{ 10145 let Predicates = [prd] in { 10146 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10147 sched.ZMM, _.info512>, 10148 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10149 sched.ZMM, _.info512>, EVEX_V512; 10150 } 10151 let Predicates = [prd, HasVLX] in { 10152 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10153 sched.XMM, _.info128>, EVEX_V128; 10154 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10155 sched.YMM, _.info256>, EVEX_V256; 10156 } 10157} 10158 10159//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10160// op(reg_vec2,mem_vec,imm) 10161// op(reg_vec2,broadcast(eltVt),imm) 10162//all instruction created with FROUND_CURRENT 10163multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10164 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10165 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10166 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10167 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10168 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10169 (OpNode (_.VT _.RC:$src1), 10170 (_.VT _.RC:$src2), 10171 (i32 timm:$src3))>, 10172 Sched<[sched]>; 10173 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10174 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10175 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10176 (OpNode (_.VT _.RC:$src1), 10177 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10178 (i32 timm:$src3))>, 10179 Sched<[sched.Folded, sched.ReadAfterFold]>; 10180 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10181 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10182 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10183 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10184 (OpNode (_.VT _.RC:$src1), 10185 (_.VT (_.BroadcastLdFrag addr:$src2)), 10186 (i32 timm:$src3))>, EVEX_B, 10187 Sched<[sched.Folded, sched.ReadAfterFold]>; 10188 } 10189} 10190 10191//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10192// op(reg_vec2,mem_vec,imm) 10193multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10194 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10195 X86VectorVTInfo SrcInfo>{ 10196 let ExeDomain = DestInfo.ExeDomain in { 10197 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10198 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10199 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10200 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10201 (SrcInfo.VT SrcInfo.RC:$src2), 10202 (i8 timm:$src3)))>, 10203 Sched<[sched]>; 10204 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10205 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10206 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10207 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10208 (SrcInfo.VT (bitconvert 10209 (SrcInfo.LdFrag addr:$src2))), 10210 (i8 timm:$src3)))>, 10211 Sched<[sched.Folded, sched.ReadAfterFold]>; 10212 } 10213} 10214 10215//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10216// op(reg_vec2,mem_vec,imm) 10217// op(reg_vec2,broadcast(eltVt),imm) 10218multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10219 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10220 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10221 10222 let ExeDomain = _.ExeDomain in 10223 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10224 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10225 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10226 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10227 (OpNode (_.VT _.RC:$src1), 10228 (_.VT (_.BroadcastLdFrag addr:$src2)), 10229 (i8 timm:$src3))>, EVEX_B, 10230 Sched<[sched.Folded, sched.ReadAfterFold]>; 10231} 10232 10233//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10234// op(reg_vec2,mem_scalar,imm) 10235multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10236 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10237 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10238 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10239 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10240 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10241 (OpNode (_.VT _.RC:$src1), 10242 (_.VT _.RC:$src2), 10243 (i32 timm:$src3))>, 10244 Sched<[sched]>; 10245 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10246 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10247 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10248 (OpNode (_.VT _.RC:$src1), 10249 (_.ScalarIntMemFrags addr:$src2), 10250 (i32 timm:$src3))>, 10251 Sched<[sched.Folded, sched.ReadAfterFold]>; 10252 } 10253} 10254 10255//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10256multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10257 SDNode OpNode, X86FoldableSchedWrite sched, 10258 X86VectorVTInfo _> { 10259 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10260 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10261 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10262 OpcodeStr, "$src3, {sae}, $src2, $src1", 10263 "$src1, $src2, {sae}, $src3", 10264 (OpNode (_.VT _.RC:$src1), 10265 (_.VT _.RC:$src2), 10266 (i32 timm:$src3))>, 10267 EVEX_B, Sched<[sched]>; 10268} 10269 10270//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10271multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10272 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10273 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10274 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10275 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10276 OpcodeStr, "$src3, {sae}, $src2, $src1", 10277 "$src1, $src2, {sae}, $src3", 10278 (OpNode (_.VT _.RC:$src1), 10279 (_.VT _.RC:$src2), 10280 (i32 timm:$src3))>, 10281 EVEX_B, Sched<[sched]>; 10282} 10283 10284multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10285 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10286 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10287 let Predicates = [prd] in { 10288 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10289 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10290 EVEX_V512; 10291 10292 } 10293 let Predicates = [prd, HasVLX] in { 10294 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10295 EVEX_V128; 10296 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10297 EVEX_V256; 10298 } 10299} 10300 10301multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10302 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10303 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10304 let Predicates = [Pred] in { 10305 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10306 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10307 } 10308 let Predicates = [Pred, HasVLX] in { 10309 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10310 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10311 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10312 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10313 } 10314} 10315 10316multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10317 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10318 Predicate Pred = HasAVX512> { 10319 let Predicates = [Pred] in { 10320 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10321 EVEX_V512; 10322 } 10323 let Predicates = [Pred, HasVLX] in { 10324 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10325 EVEX_V128; 10326 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10327 EVEX_V256; 10328 } 10329} 10330 10331multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10332 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10333 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10334 let Predicates = [prd] in { 10335 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10336 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10337 } 10338} 10339 10340multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10341 bits<8> opcPs, bits<8> opcPd, SDNode OpNode, 10342 SDNode MaskOpNode, SDNode OpNodeSAE, 10343 X86SchedWriteWidths sched, Predicate prd>{ 10344 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10345 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10346 EVEX_CD8<32, CD8VF>; 10347 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10348 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10349 EVEX_CD8<64, CD8VF>, VEX_W; 10350} 10351 10352defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10353 X86VReduce, X86VReduce, X86VReduceSAE, 10354 SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX; 10355defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10356 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 10357 SchedWriteFRnd, HasAVX512>, 10358 AVX512AIi8Base, EVEX; 10359defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10360 X86VGetMant, X86VGetMant, X86VGetMantSAE, 10361 SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX; 10362 10363defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10364 0x50, X86VRange, X86VRangeSAE, 10365 SchedWriteFAdd, HasDQI>, 10366 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10367defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10368 0x50, X86VRange, X86VRangeSAE, 10369 SchedWriteFAdd, HasDQI>, 10370 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10371 10372defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10373 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10374 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10375defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10376 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10377 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10378 10379defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10380 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10381 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10382defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10383 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10384 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10385 10386defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10387 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10388 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10389defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10390 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10391 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10392 10393multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10394 X86FoldableSchedWrite sched, 10395 X86VectorVTInfo _, 10396 X86VectorVTInfo CastInfo, 10397 string EVEX2VEXOvrd> { 10398 let ExeDomain = _.ExeDomain in { 10399 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10400 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10401 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10402 (_.VT (bitconvert 10403 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10404 (i8 timm:$src3)))))>, 10405 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 10406 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10407 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10408 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10409 (_.VT 10410 (bitconvert 10411 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10412 (CastInfo.LdFrag addr:$src2), 10413 (i8 timm:$src3)))))>, 10414 Sched<[sched.Folded, sched.ReadAfterFold]>, 10415 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 10416 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10417 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10418 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10419 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10420 (_.VT 10421 (bitconvert 10422 (CastInfo.VT 10423 (X86Shuf128 _.RC:$src1, 10424 (_.BroadcastLdFrag addr:$src2), 10425 (i8 timm:$src3)))))>, EVEX_B, 10426 Sched<[sched.Folded, sched.ReadAfterFold]>; 10427 } 10428} 10429 10430multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10431 AVX512VLVectorVTInfo _, 10432 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 10433 string EVEX2VEXOvrd>{ 10434 let Predicates = [HasAVX512] in 10435 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10436 _.info512, CastInfo.info512, "">, EVEX_V512; 10437 10438 let Predicates = [HasAVX512, HasVLX] in 10439 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10440 _.info256, CastInfo.info256, 10441 EVEX2VEXOvrd>, EVEX_V256; 10442} 10443 10444defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10445 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10446defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10447 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10448defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10449 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10450defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10451 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10452 10453multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10454 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10455 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 10456 // instantiation of this class. 10457 let ExeDomain = _.ExeDomain in { 10458 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10459 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10460 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10461 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 10462 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 10463 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10464 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10465 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10466 (_.VT (X86VAlign _.RC:$src1, 10467 (bitconvert (_.LdFrag addr:$src2)), 10468 (i8 timm:$src3)))>, 10469 Sched<[sched.Folded, sched.ReadAfterFold]>, 10470 EVEX2VEXOverride<"VPALIGNRrmi">; 10471 10472 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10473 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10474 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10475 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10476 (X86VAlign _.RC:$src1, 10477 (_.VT (_.BroadcastLdFrag addr:$src2)), 10478 (i8 timm:$src3))>, EVEX_B, 10479 Sched<[sched.Folded, sched.ReadAfterFold]>; 10480 } 10481} 10482 10483multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10484 AVX512VLVectorVTInfo _> { 10485 let Predicates = [HasAVX512] in { 10486 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10487 AVX512AIi8Base, EVEX_4V, EVEX_V512; 10488 } 10489 let Predicates = [HasAVX512, HasVLX] in { 10490 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10491 AVX512AIi8Base, EVEX_4V, EVEX_V128; 10492 // We can't really override the 256-bit version so change it back to unset. 10493 let EVEX2VEXOverride = ? in 10494 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10495 AVX512AIi8Base, EVEX_4V, EVEX_V256; 10496 } 10497} 10498 10499defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10500 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10501defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10502 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10503 VEX_W; 10504 10505defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10506 SchedWriteShuffle, avx512vl_i8_info, 10507 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10508 10509// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10510// into vpalignr. 10511def ValignqImm32XForm : SDNodeXForm<timm, [{ 10512 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10513}]>; 10514def ValignqImm8XForm : SDNodeXForm<timm, [{ 10515 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10516}]>; 10517def ValigndImm8XForm : SDNodeXForm<timm, [{ 10518 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10519}]>; 10520 10521multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10522 X86VectorVTInfo From, X86VectorVTInfo To, 10523 SDNodeXForm ImmXForm> { 10524 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10525 (bitconvert 10526 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10527 timm:$src3))), 10528 To.RC:$src0)), 10529 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10530 To.RC:$src1, To.RC:$src2, 10531 (ImmXForm timm:$src3))>; 10532 10533 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10534 (bitconvert 10535 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10536 timm:$src3))), 10537 To.ImmAllZerosV)), 10538 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10539 To.RC:$src1, To.RC:$src2, 10540 (ImmXForm timm:$src3))>; 10541 10542 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10543 (bitconvert 10544 (From.VT (OpNode From.RC:$src1, 10545 (From.LdFrag addr:$src2), 10546 timm:$src3))), 10547 To.RC:$src0)), 10548 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10549 To.RC:$src1, addr:$src2, 10550 (ImmXForm timm:$src3))>; 10551 10552 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10553 (bitconvert 10554 (From.VT (OpNode From.RC:$src1, 10555 (From.LdFrag addr:$src2), 10556 timm:$src3))), 10557 To.ImmAllZerosV)), 10558 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 10559 To.RC:$src1, addr:$src2, 10560 (ImmXForm timm:$src3))>; 10561} 10562 10563multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 10564 X86VectorVTInfo From, 10565 X86VectorVTInfo To, 10566 SDNodeXForm ImmXForm> : 10567 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 10568 def : Pat<(From.VT (OpNode From.RC:$src1, 10569 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 10570 timm:$src3)), 10571 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 10572 (ImmXForm timm:$src3))>; 10573 10574 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10575 (bitconvert 10576 (From.VT (OpNode From.RC:$src1, 10577 (bitconvert 10578 (To.VT (To.BroadcastLdFrag addr:$src2))), 10579 timm:$src3))), 10580 To.RC:$src0)), 10581 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 10582 To.RC:$src1, addr:$src2, 10583 (ImmXForm timm:$src3))>; 10584 10585 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10586 (bitconvert 10587 (From.VT (OpNode From.RC:$src1, 10588 (bitconvert 10589 (To.VT (To.BroadcastLdFrag addr:$src2))), 10590 timm:$src3))), 10591 To.ImmAllZerosV)), 10592 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 10593 To.RC:$src1, addr:$src2, 10594 (ImmXForm timm:$src3))>; 10595} 10596 10597let Predicates = [HasAVX512] in { 10598 // For 512-bit we lower to the widest element type we can. So we only need 10599 // to handle converting valignq to valignd. 10600 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 10601 v16i32_info, ValignqImm32XForm>; 10602} 10603 10604let Predicates = [HasVLX] in { 10605 // For 128-bit we lower to the widest element type we can. So we only need 10606 // to handle converting valignq to valignd. 10607 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 10608 v4i32x_info, ValignqImm32XForm>; 10609 // For 256-bit we lower to the widest element type we can. So we only need 10610 // to handle converting valignq to valignd. 10611 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 10612 v8i32x_info, ValignqImm32XForm>; 10613} 10614 10615let Predicates = [HasVLX, HasBWI] in { 10616 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 10617 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 10618 v16i8x_info, ValignqImm8XForm>; 10619 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 10620 v16i8x_info, ValigndImm8XForm>; 10621} 10622 10623defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 10624 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 10625 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 10626 10627multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10628 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10629 let ExeDomain = _.ExeDomain in { 10630 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10631 (ins _.RC:$src1), OpcodeStr, 10632 "$src1", "$src1", 10633 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 10634 Sched<[sched]>; 10635 10636 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10637 (ins _.MemOp:$src1), OpcodeStr, 10638 "$src1", "$src1", 10639 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 10640 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 10641 Sched<[sched.Folded]>; 10642 } 10643} 10644 10645multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10646 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 10647 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 10648 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10649 (ins _.ScalarMemOp:$src1), OpcodeStr, 10650 "${src1}"#_.BroadcastStr, 10651 "${src1}"#_.BroadcastStr, 10652 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 10653 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 10654 Sched<[sched.Folded]>; 10655} 10656 10657multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10658 X86SchedWriteWidths sched, 10659 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10660 let Predicates = [prd] in 10661 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10662 EVEX_V512; 10663 10664 let Predicates = [prd, HasVLX] in { 10665 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10666 EVEX_V256; 10667 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10668 EVEX_V128; 10669 } 10670} 10671 10672multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10673 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 10674 Predicate prd> { 10675 let Predicates = [prd] in 10676 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10677 EVEX_V512; 10678 10679 let Predicates = [prd, HasVLX] in { 10680 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10681 EVEX_V256; 10682 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10683 EVEX_V128; 10684 } 10685} 10686 10687multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 10688 SDNode OpNode, X86SchedWriteWidths sched, 10689 Predicate prd> { 10690 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 10691 avx512vl_i64_info, prd>, VEX_W; 10692 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 10693 avx512vl_i32_info, prd>; 10694} 10695 10696multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 10697 SDNode OpNode, X86SchedWriteWidths sched, 10698 Predicate prd> { 10699 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 10700 avx512vl_i16_info, prd>, VEX_WIG; 10701 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 10702 avx512vl_i8_info, prd>, VEX_WIG; 10703} 10704 10705multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 10706 bits<8> opc_d, bits<8> opc_q, 10707 string OpcodeStr, SDNode OpNode, 10708 X86SchedWriteWidths sched> { 10709 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 10710 HasAVX512>, 10711 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 10712 HasBWI>; 10713} 10714 10715defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 10716 SchedWriteVecALU>; 10717 10718// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 10719let Predicates = [HasAVX512, NoVLX] in { 10720 def : Pat<(v4i64 (abs VR256X:$src)), 10721 (EXTRACT_SUBREG 10722 (VPABSQZrr 10723 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 10724 sub_ymm)>; 10725 def : Pat<(v2i64 (abs VR128X:$src)), 10726 (EXTRACT_SUBREG 10727 (VPABSQZrr 10728 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 10729 sub_xmm)>; 10730} 10731 10732// Use 512bit version to implement 128/256 bit. 10733multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 10734 AVX512VLVectorVTInfo _, Predicate prd> { 10735 let Predicates = [prd, NoVLX] in { 10736 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 10737 (EXTRACT_SUBREG 10738 (!cast<Instruction>(InstrStr # "Zrr") 10739 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10740 _.info256.RC:$src1, 10741 _.info256.SubRegIdx)), 10742 _.info256.SubRegIdx)>; 10743 10744 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 10745 (EXTRACT_SUBREG 10746 (!cast<Instruction>(InstrStr # "Zrr") 10747 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10748 _.info128.RC:$src1, 10749 _.info128.SubRegIdx)), 10750 _.info128.SubRegIdx)>; 10751 } 10752} 10753 10754defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 10755 SchedWriteVecIMul, HasCDI>; 10756 10757// FIXME: Is there a better scheduler class for VPCONFLICT? 10758defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 10759 SchedWriteVecALU, HasCDI>; 10760 10761// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 10762defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 10763defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 10764 10765//===---------------------------------------------------------------------===// 10766// Counts number of ones - VPOPCNTD and VPOPCNTQ 10767//===---------------------------------------------------------------------===// 10768 10769// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 10770defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 10771 SchedWriteVecALU, HasVPOPCNTDQ>; 10772 10773defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 10774defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 10775 10776//===---------------------------------------------------------------------===// 10777// Replicate Single FP - MOVSHDUP and MOVSLDUP 10778//===---------------------------------------------------------------------===// 10779 10780multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 10781 X86SchedWriteWidths sched> { 10782 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 10783 avx512vl_f32_info, HasAVX512>, XS; 10784} 10785 10786defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 10787 SchedWriteFShuffle>; 10788defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 10789 SchedWriteFShuffle>; 10790 10791//===----------------------------------------------------------------------===// 10792// AVX-512 - MOVDDUP 10793//===----------------------------------------------------------------------===// 10794 10795multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 10796 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10797 let ExeDomain = _.ExeDomain in { 10798 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10799 (ins _.RC:$src), OpcodeStr, "$src", "$src", 10800 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 10801 Sched<[sched]>; 10802 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10803 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 10804 (_.VT (_.BroadcastLdFrag addr:$src))>, 10805 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 10806 Sched<[sched.Folded]>; 10807 } 10808} 10809 10810multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 10811 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 10812 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 10813 VTInfo.info512>, EVEX_V512; 10814 10815 let Predicates = [HasAVX512, HasVLX] in { 10816 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 10817 VTInfo.info256>, EVEX_V256; 10818 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 10819 VTInfo.info128>, EVEX_V128; 10820 } 10821} 10822 10823multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, 10824 X86SchedWriteWidths sched> { 10825 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, 10826 avx512vl_f64_info>, XD, VEX_W; 10827} 10828 10829defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; 10830 10831let Predicates = [HasVLX] in { 10832def : Pat<(v2f64 (X86VBroadcast f64:$src)), 10833 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10834 10835def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10836 (v2f64 VR128X:$src0)), 10837 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 10838 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10839def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10840 immAllZerosV), 10841 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10842} 10843 10844//===----------------------------------------------------------------------===// 10845// AVX-512 - Unpack Instructions 10846//===----------------------------------------------------------------------===// 10847 10848let Uses = []<Register>, mayRaiseFPException = 0 in { 10849defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 10850 SchedWriteFShuffleSizes, 0, 1>; 10851defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 10852 SchedWriteFShuffleSizes>; 10853} 10854 10855defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 10856 SchedWriteShuffle, HasBWI>; 10857defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 10858 SchedWriteShuffle, HasBWI>; 10859defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 10860 SchedWriteShuffle, HasBWI>; 10861defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 10862 SchedWriteShuffle, HasBWI>; 10863 10864defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 10865 SchedWriteShuffle, HasAVX512>; 10866defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 10867 SchedWriteShuffle, HasAVX512>; 10868defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 10869 SchedWriteShuffle, HasAVX512>; 10870defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 10871 SchedWriteShuffle, HasAVX512>; 10872 10873//===----------------------------------------------------------------------===// 10874// AVX-512 - Extract & Insert Integer Instructions 10875//===----------------------------------------------------------------------===// 10876 10877multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10878 X86VectorVTInfo _> { 10879 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 10880 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10881 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10882 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 10883 addr:$dst)]>, 10884 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 10885} 10886 10887multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 10888 let Predicates = [HasBWI] in { 10889 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 10890 (ins _.RC:$src1, u8imm:$src2), 10891 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10892 [(set GR32orGR64:$dst, 10893 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 10894 EVEX, TAPD, Sched<[WriteVecExtract]>; 10895 10896 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 10897 } 10898} 10899 10900multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 10901 let Predicates = [HasBWI] in { 10902 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 10903 (ins _.RC:$src1, u8imm:$src2), 10904 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10905 [(set GR32orGR64:$dst, 10906 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 10907 EVEX, PD, Sched<[WriteVecExtract]>; 10908 10909 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 10910 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 10911 (ins _.RC:$src1, u8imm:$src2), 10912 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 10913 EVEX, TAPD, FoldGenData<NAME#rr>, 10914 Sched<[WriteVecExtract]>; 10915 10916 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 10917 } 10918} 10919 10920multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 10921 RegisterClass GRC> { 10922 let Predicates = [HasDQI] in { 10923 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 10924 (ins _.RC:$src1, u8imm:$src2), 10925 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10926 [(set GRC:$dst, 10927 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 10928 EVEX, TAPD, Sched<[WriteVecExtract]>; 10929 10930 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 10931 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10932 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10933 [(store (extractelt (_.VT _.RC:$src1), 10934 imm:$src2),addr:$dst)]>, 10935 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 10936 Sched<[WriteVecExtractSt]>; 10937 } 10938} 10939 10940defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 10941defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 10942defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 10943defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 10944 10945multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10946 X86VectorVTInfo _, PatFrag LdFrag, 10947 SDPatternOperator immoperator> { 10948 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 10949 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10950 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10951 [(set _.RC:$dst, 10952 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 10953 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 10954} 10955 10956multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 10957 X86VectorVTInfo _, PatFrag LdFrag> { 10958 let Predicates = [HasBWI] in { 10959 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10960 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 10961 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10962 [(set _.RC:$dst, 10963 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, 10964 Sched<[WriteVecInsert]>; 10965 10966 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 10967 } 10968} 10969 10970multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 10971 X86VectorVTInfo _, RegisterClass GRC> { 10972 let Predicates = [HasDQI] in { 10973 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10974 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 10975 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10976 [(set _.RC:$dst, 10977 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 10978 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 10979 10980 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 10981 _.ScalarLdFrag, imm>, TAPD; 10982 } 10983} 10984 10985defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 10986 extloadi8>, TAPD, VEX_WIG; 10987defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 10988 extloadi16>, PD, VEX_WIG; 10989defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 10990defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 10991 10992//===----------------------------------------------------------------------===// 10993// VSHUFPS - VSHUFPD Operations 10994//===----------------------------------------------------------------------===// 10995 10996multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, 10997 AVX512VLVectorVTInfo VTInfo_FP>{ 10998 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 10999 SchedWriteFShuffle>, 11000 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11001 AVX512AIi8Base, EVEX_4V; 11002} 11003 11004defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; 11005defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; 11006 11007//===----------------------------------------------------------------------===// 11008// AVX-512 - Byte shift Left/Right 11009//===----------------------------------------------------------------------===// 11010 11011multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11012 Format MRMm, string OpcodeStr, 11013 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11014 def ri : AVX512<opc, MRMr, 11015 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11016 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11017 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11018 Sched<[sched]>; 11019 def mi : AVX512<opc, MRMm, 11020 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11021 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11022 [(set _.RC:$dst,(_.VT (OpNode 11023 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11024 (i8 timm:$src2))))]>, 11025 Sched<[sched.Folded, sched.ReadAfterFold]>; 11026} 11027 11028multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11029 Format MRMm, string OpcodeStr, 11030 X86SchedWriteWidths sched, Predicate prd>{ 11031 let Predicates = [prd] in 11032 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11033 sched.ZMM, v64i8_info>, EVEX_V512; 11034 let Predicates = [prd, HasVLX] in { 11035 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11036 sched.YMM, v32i8x_info>, EVEX_V256; 11037 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11038 sched.XMM, v16i8x_info>, EVEX_V128; 11039 } 11040} 11041defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11042 SchedWriteShuffle, HasBWI>, 11043 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11044defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11045 SchedWriteShuffle, HasBWI>, 11046 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11047 11048multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11049 string OpcodeStr, X86FoldableSchedWrite sched, 11050 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11051 let isCommutable = 1 in 11052 def rr : AVX512BI<opc, MRMSrcReg, 11053 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11054 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11055 [(set _dst.RC:$dst,(_dst.VT 11056 (OpNode (_src.VT _src.RC:$src1), 11057 (_src.VT _src.RC:$src2))))]>, 11058 Sched<[sched]>; 11059 def rm : AVX512BI<opc, MRMSrcMem, 11060 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11061 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11062 [(set _dst.RC:$dst,(_dst.VT 11063 (OpNode (_src.VT _src.RC:$src1), 11064 (_src.VT (bitconvert 11065 (_src.LdFrag addr:$src2))))))]>, 11066 Sched<[sched.Folded, sched.ReadAfterFold]>; 11067} 11068 11069multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11070 string OpcodeStr, X86SchedWriteWidths sched, 11071 Predicate prd> { 11072 let Predicates = [prd] in 11073 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11074 v8i64_info, v64i8_info>, EVEX_V512; 11075 let Predicates = [prd, HasVLX] in { 11076 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11077 v4i64x_info, v32i8x_info>, EVEX_V256; 11078 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11079 v2i64x_info, v16i8x_info>, EVEX_V128; 11080 } 11081} 11082 11083defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11084 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11085 11086// Transforms to swizzle an immediate to enable better matching when 11087// memory operand isn't in the right place. 11088def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11089 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11090 uint8_t Imm = N->getZExtValue(); 11091 // Swap bits 1/4 and 3/6. 11092 uint8_t NewImm = Imm & 0xa5; 11093 if (Imm & 0x02) NewImm |= 0x10; 11094 if (Imm & 0x10) NewImm |= 0x02; 11095 if (Imm & 0x08) NewImm |= 0x40; 11096 if (Imm & 0x40) NewImm |= 0x08; 11097 return getI8Imm(NewImm, SDLoc(N)); 11098}]>; 11099def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11100 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11101 uint8_t Imm = N->getZExtValue(); 11102 // Swap bits 2/4 and 3/5. 11103 uint8_t NewImm = Imm & 0xc3; 11104 if (Imm & 0x04) NewImm |= 0x10; 11105 if (Imm & 0x10) NewImm |= 0x04; 11106 if (Imm & 0x08) NewImm |= 0x20; 11107 if (Imm & 0x20) NewImm |= 0x08; 11108 return getI8Imm(NewImm, SDLoc(N)); 11109}]>; 11110def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11111 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11112 uint8_t Imm = N->getZExtValue(); 11113 // Swap bits 1/2 and 5/6. 11114 uint8_t NewImm = Imm & 0x99; 11115 if (Imm & 0x02) NewImm |= 0x04; 11116 if (Imm & 0x04) NewImm |= 0x02; 11117 if (Imm & 0x20) NewImm |= 0x40; 11118 if (Imm & 0x40) NewImm |= 0x20; 11119 return getI8Imm(NewImm, SDLoc(N)); 11120}]>; 11121def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11122 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11123 uint8_t Imm = N->getZExtValue(); 11124 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11125 uint8_t NewImm = Imm & 0x81; 11126 if (Imm & 0x02) NewImm |= 0x04; 11127 if (Imm & 0x04) NewImm |= 0x10; 11128 if (Imm & 0x08) NewImm |= 0x40; 11129 if (Imm & 0x10) NewImm |= 0x02; 11130 if (Imm & 0x20) NewImm |= 0x08; 11131 if (Imm & 0x40) NewImm |= 0x20; 11132 return getI8Imm(NewImm, SDLoc(N)); 11133}]>; 11134def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11135 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11136 uint8_t Imm = N->getZExtValue(); 11137 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11138 uint8_t NewImm = Imm & 0x81; 11139 if (Imm & 0x02) NewImm |= 0x10; 11140 if (Imm & 0x04) NewImm |= 0x02; 11141 if (Imm & 0x08) NewImm |= 0x20; 11142 if (Imm & 0x10) NewImm |= 0x04; 11143 if (Imm & 0x20) NewImm |= 0x40; 11144 if (Imm & 0x40) NewImm |= 0x08; 11145 return getI8Imm(NewImm, SDLoc(N)); 11146}]>; 11147 11148multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11149 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11150 string Name>{ 11151 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11152 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11153 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11154 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11155 (OpNode (_.VT _.RC:$src1), 11156 (_.VT _.RC:$src2), 11157 (_.VT _.RC:$src3), 11158 (i8 timm:$src4)), 1, 1>, 11159 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11160 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11161 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11162 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11163 (OpNode (_.VT _.RC:$src1), 11164 (_.VT _.RC:$src2), 11165 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11166 (i8 timm:$src4)), 1, 0>, 11167 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11168 Sched<[sched.Folded, sched.ReadAfterFold]>; 11169 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11170 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11171 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11172 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11173 (OpNode (_.VT _.RC:$src1), 11174 (_.VT _.RC:$src2), 11175 (_.VT (_.BroadcastLdFrag addr:$src3)), 11176 (i8 timm:$src4)), 1, 0>, EVEX_B, 11177 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11178 Sched<[sched.Folded, sched.ReadAfterFold]>; 11179 }// Constraints = "$src1 = $dst" 11180 11181 // Additional patterns for matching passthru operand in other positions. 11182 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11183 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11184 _.RC:$src1)), 11185 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11186 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11187 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11188 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11189 _.RC:$src1)), 11190 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11191 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11192 11193 // Additional patterns for matching zero masking with loads in other 11194 // positions. 11195 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11196 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11197 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11198 _.ImmAllZerosV)), 11199 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11200 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11201 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11202 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11203 _.RC:$src2, (i8 timm:$src4)), 11204 _.ImmAllZerosV)), 11205 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11206 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11207 11208 // Additional patterns for matching masked loads with different 11209 // operand orders. 11210 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11211 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11212 _.RC:$src2, (i8 timm:$src4)), 11213 _.RC:$src1)), 11214 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11215 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11216 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11217 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11218 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11219 _.RC:$src1)), 11220 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11221 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11222 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11223 (OpNode _.RC:$src2, _.RC:$src1, 11224 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11225 _.RC:$src1)), 11226 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11227 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11228 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11229 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11230 _.RC:$src1, (i8 timm:$src4)), 11231 _.RC:$src1)), 11232 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11233 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11234 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11235 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11236 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11237 _.RC:$src1)), 11238 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11239 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11240 11241 // Additional patterns for matching zero masking with broadcasts in other 11242 // positions. 11243 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11244 (OpNode (_.BroadcastLdFrag addr:$src3), 11245 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11246 _.ImmAllZerosV)), 11247 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11248 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11249 (VPTERNLOG321_imm8 timm:$src4))>; 11250 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11251 (OpNode _.RC:$src1, 11252 (_.BroadcastLdFrag addr:$src3), 11253 _.RC:$src2, (i8 timm:$src4)), 11254 _.ImmAllZerosV)), 11255 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11256 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11257 (VPTERNLOG132_imm8 timm:$src4))>; 11258 11259 // Additional patterns for matching masked broadcasts with different 11260 // operand orders. 11261 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11262 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11263 _.RC:$src2, (i8 timm:$src4)), 11264 _.RC:$src1)), 11265 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11266 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11267 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11268 (OpNode (_.BroadcastLdFrag addr:$src3), 11269 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11270 _.RC:$src1)), 11271 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11272 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11273 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11274 (OpNode _.RC:$src2, _.RC:$src1, 11275 (_.BroadcastLdFrag addr:$src3), 11276 (i8 timm:$src4)), _.RC:$src1)), 11277 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11278 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11279 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11280 (OpNode _.RC:$src2, 11281 (_.BroadcastLdFrag addr:$src3), 11282 _.RC:$src1, (i8 timm:$src4)), 11283 _.RC:$src1)), 11284 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11285 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11286 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11287 (OpNode (_.BroadcastLdFrag addr:$src3), 11288 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11289 _.RC:$src1)), 11290 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11291 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11292} 11293 11294multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11295 AVX512VLVectorVTInfo _> { 11296 let Predicates = [HasAVX512] in 11297 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11298 _.info512, NAME>, EVEX_V512; 11299 let Predicates = [HasAVX512, HasVLX] in { 11300 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11301 _.info128, NAME>, EVEX_V128; 11302 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11303 _.info256, NAME>, EVEX_V256; 11304 } 11305} 11306 11307defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11308 avx512vl_i32_info>; 11309defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11310 avx512vl_i64_info>, VEX_W; 11311 11312// Patterns to implement vnot using vpternlog instead of creating all ones 11313// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11314// so that the result is only dependent on src0. But we use the same source 11315// for all operands to prevent a false dependency. 11316// TODO: We should maybe have a more generalized algorithm for folding to 11317// vpternlog. 11318let Predicates = [HasAVX512] in { 11319 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)), 11320 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11321 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)), 11322 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11323 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)), 11324 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11325 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)), 11326 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11327} 11328 11329let Predicates = [HasAVX512, NoVLX] in { 11330 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11331 (EXTRACT_SUBREG 11332 (VPTERNLOGQZrri 11333 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11334 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11335 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11336 (i8 15)), sub_xmm)>; 11337 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11338 (EXTRACT_SUBREG 11339 (VPTERNLOGQZrri 11340 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11341 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11342 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11343 (i8 15)), sub_xmm)>; 11344 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11345 (EXTRACT_SUBREG 11346 (VPTERNLOGQZrri 11347 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11348 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11349 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11350 (i8 15)), sub_xmm)>; 11351 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11352 (EXTRACT_SUBREG 11353 (VPTERNLOGQZrri 11354 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11355 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11356 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11357 (i8 15)), sub_xmm)>; 11358 11359 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11360 (EXTRACT_SUBREG 11361 (VPTERNLOGQZrri 11362 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11363 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11364 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11365 (i8 15)), sub_ymm)>; 11366 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11367 (EXTRACT_SUBREG 11368 (VPTERNLOGQZrri 11369 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11370 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11371 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11372 (i8 15)), sub_ymm)>; 11373 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11374 (EXTRACT_SUBREG 11375 (VPTERNLOGQZrri 11376 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11377 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11378 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11379 (i8 15)), sub_ymm)>; 11380 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11381 (EXTRACT_SUBREG 11382 (VPTERNLOGQZrri 11383 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11384 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11385 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11386 (i8 15)), sub_ymm)>; 11387} 11388 11389let Predicates = [HasVLX] in { 11390 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), 11391 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11392 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), 11393 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11394 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), 11395 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11396 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), 11397 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11398 11399 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), 11400 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11401 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), 11402 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11403 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), 11404 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11405 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), 11406 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11407} 11408 11409//===----------------------------------------------------------------------===// 11410// AVX-512 - FixupImm 11411//===----------------------------------------------------------------------===// 11412 11413multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11414 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11415 X86VectorVTInfo TblVT>{ 11416 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 11417 Uses = [MXCSR], mayRaiseFPException = 1 in { 11418 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11419 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11420 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11421 (X86VFixupimm (_.VT _.RC:$src1), 11422 (_.VT _.RC:$src2), 11423 (TblVT.VT _.RC:$src3), 11424 (i32 timm:$src4))>, Sched<[sched]>; 11425 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11426 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11427 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11428 (X86VFixupimm (_.VT _.RC:$src1), 11429 (_.VT _.RC:$src2), 11430 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11431 (i32 timm:$src4))>, 11432 Sched<[sched.Folded, sched.ReadAfterFold]>; 11433 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11434 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11435 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11436 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11437 (X86VFixupimm (_.VT _.RC:$src1), 11438 (_.VT _.RC:$src2), 11439 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 11440 (i32 timm:$src4))>, 11441 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11442 } // Constraints = "$src1 = $dst" 11443} 11444 11445multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11446 X86FoldableSchedWrite sched, 11447 X86VectorVTInfo _, X86VectorVTInfo TblVT> 11448 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 11449let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 11450 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11451 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11452 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11453 "$src2, $src3, {sae}, $src4", 11454 (X86VFixupimmSAE (_.VT _.RC:$src1), 11455 (_.VT _.RC:$src2), 11456 (TblVT.VT _.RC:$src3), 11457 (i32 timm:$src4))>, 11458 EVEX_B, Sched<[sched]>; 11459 } 11460} 11461 11462multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 11463 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11464 X86VectorVTInfo _src3VT> { 11465 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11466 ExeDomain = _.ExeDomain in { 11467 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11468 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11469 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11470 (X86VFixupimms (_.VT _.RC:$src1), 11471 (_.VT _.RC:$src2), 11472 (_src3VT.VT _src3VT.RC:$src3), 11473 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 11474 let Uses = [MXCSR] in 11475 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11476 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11477 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11478 "$src2, $src3, {sae}, $src4", 11479 (X86VFixupimmSAEs (_.VT _.RC:$src1), 11480 (_.VT _.RC:$src2), 11481 (_src3VT.VT _src3VT.RC:$src3), 11482 (i32 timm:$src4))>, 11483 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11484 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11485 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11486 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11487 (X86VFixupimms (_.VT _.RC:$src1), 11488 (_.VT _.RC:$src2), 11489 (_src3VT.VT (scalar_to_vector 11490 (_src3VT.ScalarLdFrag addr:$src3))), 11491 (i32 timm:$src4))>, 11492 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 11493 } 11494} 11495 11496multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11497 AVX512VLVectorVTInfo _Vec, 11498 AVX512VLVectorVTInfo _Tbl> { 11499 let Predicates = [HasAVX512] in 11500 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 11501 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11502 EVEX_4V, EVEX_V512; 11503 let Predicates = [HasAVX512, HasVLX] in { 11504 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 11505 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11506 EVEX_4V, EVEX_V128; 11507 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 11508 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11509 EVEX_4V, EVEX_V256; 11510 } 11511} 11512 11513defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11514 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11515 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11516defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11517 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11518 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11519defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11520 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11521defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11522 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 11523 11524// Patterns used to select SSE scalar fp arithmetic instructions from 11525// either: 11526// 11527// (1) a scalar fp operation followed by a blend 11528// 11529// The effect is that the backend no longer emits unnecessary vector 11530// insert instructions immediately after SSE scalar fp instructions 11531// like addss or mulss. 11532// 11533// For example, given the following code: 11534// __m128 foo(__m128 A, __m128 B) { 11535// A[0] += B[0]; 11536// return A; 11537// } 11538// 11539// Previously we generated: 11540// addss %xmm0, %xmm1 11541// movss %xmm1, %xmm0 11542// 11543// We now generate: 11544// addss %xmm1, %xmm0 11545// 11546// (2) a vector packed single/double fp operation followed by a vector insert 11547// 11548// The effect is that the backend converts the packed fp instruction 11549// followed by a vector insert into a single SSE scalar fp instruction. 11550// 11551// For example, given the following code: 11552// __m128 foo(__m128 A, __m128 B) { 11553// __m128 C = A + B; 11554// return (__m128) {c[0], a[1], a[2], a[3]}; 11555// } 11556// 11557// Previously we generated: 11558// addps %xmm0, %xmm1 11559// movss %xmm1, %xmm0 11560// 11561// We now generate: 11562// addss %xmm1, %xmm0 11563 11564// TODO: Some canonicalization in lowering would simplify the number of 11565// patterns we have to try to match. 11566multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp, 11567 string OpcPrefix, SDNode MoveNode, 11568 X86VectorVTInfo _, PatLeaf ZeroFP> { 11569 let Predicates = [HasAVX512] in { 11570 // extracted scalar math op with insert via movss 11571 def : Pat<(MoveNode 11572 (_.VT VR128X:$dst), 11573 (_.VT (scalar_to_vector 11574 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11575 _.FRC:$src)))), 11576 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 11577 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 11578 def : Pat<(MoveNode 11579 (_.VT VR128X:$dst), 11580 (_.VT (scalar_to_vector 11581 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11582 (_.ScalarLdFrag addr:$src))))), 11583 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 11584 11585 // extracted masked scalar math op with insert via movss 11586 def : Pat<(MoveNode (_.VT VR128X:$src1), 11587 (scalar_to_vector 11588 (X86selects_mask VK1WM:$mask, 11589 (MaskedOp (_.EltVT 11590 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11591 _.FRC:$src2), 11592 _.FRC:$src0))), 11593 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 11594 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11595 VK1WM:$mask, _.VT:$src1, 11596 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11597 def : Pat<(MoveNode (_.VT VR128X:$src1), 11598 (scalar_to_vector 11599 (X86selects_mask VK1WM:$mask, 11600 (MaskedOp (_.EltVT 11601 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11602 (_.ScalarLdFrag addr:$src2)), 11603 _.FRC:$src0))), 11604 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 11605 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11606 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11607 11608 // extracted masked scalar math op with insert via movss 11609 def : Pat<(MoveNode (_.VT VR128X:$src1), 11610 (scalar_to_vector 11611 (X86selects_mask VK1WM:$mask, 11612 (MaskedOp (_.EltVT 11613 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11614 _.FRC:$src2), (_.EltVT ZeroFP)))), 11615 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 11616 VK1WM:$mask, _.VT:$src1, 11617 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11618 def : Pat<(MoveNode (_.VT VR128X:$src1), 11619 (scalar_to_vector 11620 (X86selects_mask VK1WM:$mask, 11621 (MaskedOp (_.EltVT 11622 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11623 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 11624 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 11625 } 11626} 11627 11628defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 11629defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 11630defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 11631defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 11632 11633defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 11634defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 11635defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 11636defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 11637 11638multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, 11639 SDNode Move, X86VectorVTInfo _> { 11640 let Predicates = [HasAVX512] in { 11641 def : Pat<(_.VT (Move _.VT:$dst, 11642 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11643 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 11644 } 11645} 11646 11647defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 11648defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 11649 11650//===----------------------------------------------------------------------===// 11651// AES instructions 11652//===----------------------------------------------------------------------===// 11653 11654multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 11655 let Predicates = [HasVLX, HasVAES] in { 11656 defm Z128 : AESI_binop_rm_int<Op, OpStr, 11657 !cast<Intrinsic>(IntPrefix), 11658 loadv2i64, 0, VR128X, i128mem>, 11659 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 11660 defm Z256 : AESI_binop_rm_int<Op, OpStr, 11661 !cast<Intrinsic>(IntPrefix#"_256"), 11662 loadv4i64, 0, VR256X, i256mem>, 11663 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 11664 } 11665 let Predicates = [HasAVX512, HasVAES] in 11666 defm Z : AESI_binop_rm_int<Op, OpStr, 11667 !cast<Intrinsic>(IntPrefix#"_512"), 11668 loadv8i64, 0, VR512, i512mem>, 11669 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 11670} 11671 11672defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 11673defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 11674defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 11675defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 11676 11677//===----------------------------------------------------------------------===// 11678// PCLMUL instructions - Carry less multiplication 11679//===----------------------------------------------------------------------===// 11680 11681let Predicates = [HasAVX512, HasVPCLMULQDQ] in 11682defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 11683 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 11684 11685let Predicates = [HasVLX, HasVPCLMULQDQ] in { 11686defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 11687 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 11688 11689defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 11690 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 11691 EVEX_CD8<64, CD8VF>, VEX_WIG; 11692} 11693 11694// Aliases 11695defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 11696defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 11697defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 11698 11699//===----------------------------------------------------------------------===// 11700// VBMI2 11701//===----------------------------------------------------------------------===// 11702 11703multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 11704 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11705 let Constraints = "$src1 = $dst", 11706 ExeDomain = VTI.ExeDomain in { 11707 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11708 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11709 "$src3, $src2", "$src2, $src3", 11710 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 11711 AVX512FMA3Base, Sched<[sched]>; 11712 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11713 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11714 "$src3, $src2", "$src2, $src3", 11715 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11716 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11717 AVX512FMA3Base, 11718 Sched<[sched.Folded, sched.ReadAfterFold]>; 11719 } 11720} 11721 11722multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11723 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 11724 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 11725 let Constraints = "$src1 = $dst", 11726 ExeDomain = VTI.ExeDomain in 11727 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11728 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 11729 "${src3}"#VTI.BroadcastStr#", $src2", 11730 "$src2, ${src3}"#VTI.BroadcastStr, 11731 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11732 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 11733 AVX512FMA3Base, EVEX_B, 11734 Sched<[sched.Folded, sched.ReadAfterFold]>; 11735} 11736 11737multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 11738 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11739 let Predicates = [HasVBMI2] in 11740 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11741 EVEX_V512; 11742 let Predicates = [HasVBMI2, HasVLX] in { 11743 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11744 EVEX_V256; 11745 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11746 EVEX_V128; 11747 } 11748} 11749 11750multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 11751 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11752 let Predicates = [HasVBMI2] in 11753 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11754 EVEX_V512; 11755 let Predicates = [HasVBMI2, HasVLX] in { 11756 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11757 EVEX_V256; 11758 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11759 EVEX_V128; 11760 } 11761} 11762multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 11763 SDNode OpNode, X86SchedWriteWidths sched> { 11764 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 11765 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 11766 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 11767 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11768 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 11769 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 11770} 11771 11772multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 11773 SDNode OpNode, X86SchedWriteWidths sched> { 11774 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 11775 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 11776 VEX_W, EVEX_CD8<16, CD8VF>; 11777 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 11778 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11779 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 11780 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11781} 11782 11783// Concat & Shift 11784defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 11785defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 11786defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 11787defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 11788 11789// Compress 11790defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 11791 avx512vl_i8_info, HasVBMI2>, EVEX, 11792 NotMemoryFoldable; 11793defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 11794 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 11795 NotMemoryFoldable; 11796// Expand 11797defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 11798 avx512vl_i8_info, HasVBMI2>, EVEX; 11799defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 11800 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 11801 11802//===----------------------------------------------------------------------===// 11803// VNNI 11804//===----------------------------------------------------------------------===// 11805 11806let Constraints = "$src1 = $dst" in 11807multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11808 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11809 bit IsCommutable> { 11810 let ExeDomain = VTI.ExeDomain in { 11811 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11812 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11813 "$src3, $src2", "$src2, $src3", 11814 (VTI.VT (OpNode VTI.RC:$src1, 11815 VTI.RC:$src2, VTI.RC:$src3)), 11816 IsCommutable, IsCommutable>, 11817 EVEX_4V, T8PD, Sched<[sched]>; 11818 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11819 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11820 "$src3, $src2", "$src2, $src3", 11821 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11822 (VTI.VT (VTI.LdFrag addr:$src3))))>, 11823 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 11824 Sched<[sched.Folded, sched.ReadAfterFold]>; 11825 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11826 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 11827 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 11828 "$src2, ${src3}"#VTI.BroadcastStr, 11829 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11830 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 11831 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 11832 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 11833 } 11834} 11835 11836multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 11837 X86SchedWriteWidths sched, bit IsCommutable> { 11838 let Predicates = [HasVNNI] in 11839 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 11840 IsCommutable>, EVEX_V512; 11841 let Predicates = [HasVNNI, HasVLX] in { 11842 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 11843 IsCommutable>, EVEX_V256; 11844 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 11845 IsCommutable>, EVEX_V128; 11846 } 11847} 11848 11849// FIXME: Is there a better scheduler class for VPDP? 11850defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 11851defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 11852defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 11853defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 11854 11855// Patterns to match VPDPWSSD from existing instructions/intrinsics. 11856let Predicates = [HasVNNI] in { 11857 def : Pat<(v16i32 (add VR512:$src1, 11858 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 11859 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 11860 def : Pat<(v16i32 (add VR512:$src1, 11861 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 11862 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 11863} 11864let Predicates = [HasVNNI,HasVLX] in { 11865 def : Pat<(v8i32 (add VR256X:$src1, 11866 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 11867 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 11868 def : Pat<(v8i32 (add VR256X:$src1, 11869 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 11870 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 11871 def : Pat<(v4i32 (add VR128X:$src1, 11872 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 11873 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 11874 def : Pat<(v4i32 (add VR128X:$src1, 11875 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 11876 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 11877} 11878 11879//===----------------------------------------------------------------------===// 11880// Bit Algorithms 11881//===----------------------------------------------------------------------===// 11882 11883// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 11884defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 11885 avx512vl_i8_info, HasBITALG>; 11886defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 11887 avx512vl_i16_info, HasBITALG>, VEX_W; 11888 11889defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 11890defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 11891 11892def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 11893 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 11894 return N->hasOneUse(); 11895}]>; 11896 11897multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11898 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 11899 (ins VTI.RC:$src1, VTI.RC:$src2), 11900 "vpshufbitqmb", 11901 "$src2, $src1", "$src1, $src2", 11902 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11903 (VTI.VT VTI.RC:$src2)), 11904 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 11905 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 11906 Sched<[sched]>; 11907 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 11908 (ins VTI.RC:$src1, VTI.MemOp:$src2), 11909 "vpshufbitqmb", 11910 "$src2, $src1", "$src1, $src2", 11911 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11912 (VTI.VT (VTI.LdFrag addr:$src2))), 11913 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 11914 (VTI.VT (VTI.LdFrag addr:$src2)))>, 11915 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 11916 Sched<[sched.Folded, sched.ReadAfterFold]>; 11917} 11918 11919multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11920 let Predicates = [HasBITALG] in 11921 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 11922 let Predicates = [HasBITALG, HasVLX] in { 11923 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 11924 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 11925 } 11926} 11927 11928// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 11929defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 11930 11931//===----------------------------------------------------------------------===// 11932// GFNI 11933//===----------------------------------------------------------------------===// 11934 11935multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11936 X86SchedWriteWidths sched> { 11937 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11938 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 11939 EVEX_V512; 11940 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11941 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 11942 EVEX_V256; 11943 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 11944 EVEX_V128; 11945 } 11946} 11947 11948defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 11949 SchedWriteVecALU>, 11950 EVEX_CD8<8, CD8VF>, T8PD; 11951 11952multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 11953 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11954 X86VectorVTInfo BcstVTI> 11955 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 11956 let ExeDomain = VTI.ExeDomain in 11957 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11958 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 11959 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 11960 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 11961 (OpNode (VTI.VT VTI.RC:$src1), 11962 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 11963 (i8 timm:$src3))>, EVEX_B, 11964 Sched<[sched.Folded, sched.ReadAfterFold]>; 11965} 11966 11967multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11968 X86SchedWriteWidths sched> { 11969 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11970 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 11971 v64i8_info, v8i64_info>, EVEX_V512; 11972 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11973 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 11974 v32i8x_info, v4i64x_info>, EVEX_V256; 11975 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 11976 v16i8x_info, v2i64x_info>, EVEX_V128; 11977 } 11978} 11979 11980defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 11981 X86GF2P8affineinvqb, SchedWriteVecIMul>, 11982 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 11983defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 11984 X86GF2P8affineqb, SchedWriteVecIMul>, 11985 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 11986 11987 11988//===----------------------------------------------------------------------===// 11989// AVX5124FMAPS 11990//===----------------------------------------------------------------------===// 11991 11992let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 11993 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 11994defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 11995 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11996 "v4fmaddps", "$src3, $src2", "$src2, $src3", 11997 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11998 Sched<[SchedWriteFMA.ZMM.Folded]>; 11999 12000defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12001 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12002 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12003 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12004 Sched<[SchedWriteFMA.ZMM.Folded]>; 12005 12006defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12007 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12008 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12009 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12010 Sched<[SchedWriteFMA.Scl.Folded]>; 12011 12012defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12013 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12014 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12015 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12016 Sched<[SchedWriteFMA.Scl.Folded]>; 12017} 12018 12019//===----------------------------------------------------------------------===// 12020// AVX5124VNNIW 12021//===----------------------------------------------------------------------===// 12022 12023let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12024 Constraints = "$src1 = $dst" in { 12025defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12026 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12027 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12028 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12029 Sched<[SchedWriteFMA.ZMM.Folded]>; 12030 12031defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12032 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12033 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12034 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12035 Sched<[SchedWriteFMA.ZMM.Folded]>; 12036} 12037 12038let hasSideEffects = 0 in { 12039 let mayStore = 1, SchedRW = [WriteFStoreX] in 12040 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12041 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12042 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12043} 12044 12045//===----------------------------------------------------------------------===// 12046// VP2INTERSECT 12047//===----------------------------------------------------------------------===// 12048 12049multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12050 def rr : I<0x68, MRMSrcReg, 12051 (outs _.KRPC:$dst), 12052 (ins _.RC:$src1, _.RC:$src2), 12053 !strconcat("vp2intersect", _.Suffix, 12054 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12055 [(set _.KRPC:$dst, (X86vp2intersect 12056 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12057 EVEX_4V, T8XD, Sched<[sched]>; 12058 12059 def rm : I<0x68, MRMSrcMem, 12060 (outs _.KRPC:$dst), 12061 (ins _.RC:$src1, _.MemOp:$src2), 12062 !strconcat("vp2intersect", _.Suffix, 12063 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12064 [(set _.KRPC:$dst, (X86vp2intersect 12065 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12066 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12067 Sched<[sched.Folded, sched.ReadAfterFold]>; 12068 12069 def rmb : I<0x68, MRMSrcMem, 12070 (outs _.KRPC:$dst), 12071 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12072 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12073 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12074 [(set _.KRPC:$dst, (X86vp2intersect 12075 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12076 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12077 Sched<[sched.Folded, sched.ReadAfterFold]>; 12078} 12079 12080multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12081 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12082 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12083 12084 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12085 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12086 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12087 } 12088} 12089 12090defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12091defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W; 12092 12093multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12094 X86SchedWriteWidths sched, 12095 AVX512VLVectorVTInfo _SrcVTInfo, 12096 AVX512VLVectorVTInfo _DstVTInfo, 12097 SDNode OpNode, Predicate prd, 12098 bit IsCommutable = 0> { 12099 let Predicates = [prd] in 12100 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12101 _SrcVTInfo.info512, _DstVTInfo.info512, 12102 _SrcVTInfo.info512, IsCommutable>, 12103 EVEX_V512, EVEX_CD8<32, CD8VF>; 12104 let Predicates = [HasVLX, prd] in { 12105 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12106 _SrcVTInfo.info256, _DstVTInfo.info256, 12107 _SrcVTInfo.info256, IsCommutable>, 12108 EVEX_V256, EVEX_CD8<32, CD8VF>; 12109 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12110 _SrcVTInfo.info128, _DstVTInfo.info128, 12111 _SrcVTInfo.info128, IsCommutable>, 12112 EVEX_V128, EVEX_CD8<32, CD8VF>; 12113 } 12114} 12115 12116let ExeDomain = SSEPackedSingle in 12117defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12118 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12119 avx512vl_f32_info, avx512vl_i16_info, 12120 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12121 12122// Truncate Float to BFloat16 12123multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12124 X86SchedWriteWidths sched> { 12125 let ExeDomain = SSEPackedSingle in { 12126 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12127 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12128 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12129 } 12130 let Predicates = [HasBF16, HasVLX] in { 12131 let Uses = []<Register>, mayRaiseFPException = 0 in { 12132 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12133 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12134 VK4WM>, EVEX_V128; 12135 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12136 X86cvtneps2bf16, X86cvtneps2bf16, 12137 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12138 } 12139 } // Predicates = [HasBF16, HasVLX] 12140 } // ExeDomain = SSEPackedSingle 12141 12142 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12143 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12144 VR128X:$src), 0>; 12145 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12146 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12147 f128mem:$src), 0, "intel">; 12148 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12149 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12150 VR256X:$src), 0>; 12151 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12152 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12153 f256mem:$src), 0, "intel">; 12154} 12155 12156defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12157 SchedWriteCvtPD2PS>, T8XS, 12158 EVEX_CD8<32, CD8VF>; 12159 12160let Predicates = [HasBF16, HasVLX] in { 12161 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12162 // patterns have been disabled with null_frag. 12163 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12164 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12165 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12166 VK4WM:$mask), 12167 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12168 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12169 VK4WM:$mask), 12170 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12171 12172 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12173 (VCVTNEPS2BF16Z128rm addr:$src)>; 12174 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12175 VK4WM:$mask), 12176 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12177 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12178 VK4WM:$mask), 12179 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12180 12181 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12182 (X86VBroadcastld32 addr:$src)))), 12183 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12184 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12185 (v8i16 VR128X:$src0), VK4WM:$mask), 12186 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12187 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12188 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12189 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12190} 12191 12192let Constraints = "$src1 = $dst" in { 12193multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12194 X86FoldableSchedWrite sched, 12195 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12196 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12197 (ins src_v.RC:$src2, src_v.RC:$src3), 12198 OpcodeStr, "$src3, $src2", "$src2, $src3", 12199 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12200 EVEX_4V, Sched<[sched]>; 12201 12202 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12203 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12204 OpcodeStr, "$src3, $src2", "$src2, $src3", 12205 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12206 (src_v.LdFrag addr:$src3)))>, EVEX_4V, 12207 Sched<[sched.Folded, sched.ReadAfterFold]>; 12208 12209 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12210 (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3), 12211 OpcodeStr, 12212 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12213 !strconcat("$src2, ${src3}", _.BroadcastStr), 12214 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12215 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12216 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 12217 12218} 12219} // Constraints = "$src1 = $dst" 12220 12221multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12222 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12223 AVX512VLVectorVTInfo src_v, Predicate prd> { 12224 let Predicates = [prd] in { 12225 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12226 src_v.info512>, EVEX_V512; 12227 } 12228 let Predicates = [HasVLX, prd] in { 12229 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12230 src_v.info256>, EVEX_V256; 12231 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12232 src_v.info128>, EVEX_V128; 12233 } 12234} 12235 12236let ExeDomain = SSEPackedSingle in 12237defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12238 avx512vl_f32_info, avx512vl_i32_info, 12239 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12240