1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 16), 8, 48 !if (!eq (EltVT.Size, 32), 4, 49 !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT; 50 51 // The vector VT. 52 ValueType VT = !cast<ValueType>(VTName); 53 54 string EltTypeName = !cast<string>(EltVT); 55 // Size of the element type in bits, e.g. 32 for v16i32. 56 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 57 int EltSize = EltVT.Size; 58 59 // "i" for integer types and "f" for floating-point types 60 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 61 62 // Size of RC in bits, e.g. 512 for VR512. 63 int Size = VT.Size; 64 65 // The corresponding memory operand, e.g. i512mem for VR512. 66 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 67 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 68 // FP scalar memory operand for intrinsics - ssmem/sdmem. 69 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"), 70 !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 71 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))); 72 73 // Load patterns 74 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 75 76 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 77 78 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 79 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 80 81 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"), 82 !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"), 83 !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))); 84 85 // The string to specify embedded broadcast in assembly. 86 string BroadcastStr = "{1to" # NumElts # "}"; 87 88 // 8-bit compressed displacement tuple/subvector format. This is only 89 // defined for NumElts <= 8. 90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 91 !cast<CD8VForm>("CD8VT" # NumElts), ?); 92 93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 94 !if (!eq (Size, 256), sub_ymm, ?)); 95 96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 98 !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME? 99 SSEPackedInt))); 100 101 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, 102 !if (!eq (EltTypeName, "f16"), FR16X, 103 FR64X)); 104 105 dag ImmAllZerosV = (VT immAllZerosV); 106 107 string ZSuffix = !if (!eq (Size, 128), "Z128", 108 !if (!eq (Size, 256), "Z256", "Z")); 109} 110 111def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 112def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 113def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 114def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 115def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">; 116def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 117def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 118 119// "x" in v32i8x_info means RC = VR256X 120def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 121def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 122def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 123def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 124def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">; 125def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 126def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 127 128def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 129def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 130def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 131def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 132def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">; 133def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 134def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 135 136// We map scalar types to the smallest (128-bit) vector type 137// with the appropriate element type. This allows to use the same masking logic. 138def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 139def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 140def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">; 141def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 142def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 143 144class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 145 X86VectorVTInfo i128> { 146 X86VectorVTInfo info512 = i512; 147 X86VectorVTInfo info256 = i256; 148 X86VectorVTInfo info128 = i128; 149} 150 151def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 152 v16i8x_info>; 153def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 154 v8i16x_info>; 155def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 156 v4i32x_info>; 157def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 158 v2i64x_info>; 159def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info, 160 v8f16x_info>; 161def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 162 v4f32x_info>; 163def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 164 v2f64x_info>; 165 166class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 167 ValueType _vt> { 168 RegisterClass KRC = _krc; 169 RegisterClass KRCWM = _krcwm; 170 ValueType KVT = _vt; 171} 172 173def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 174def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 175def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 176def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 177def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 178def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 179def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 180 181// Used for matching masked operations. Ensures the operation part only has a 182// single use. 183def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 184 (vselect node:$mask, node:$src1, node:$src2), [{ 185 return isProfitableToFormMaskedOp(N); 186}]>; 187 188def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 189 (X86selects node:$mask, node:$src1, node:$src2), [{ 190 return isProfitableToFormMaskedOp(N); 191}]>; 192 193// This multiclass generates the masking variants from the non-masking 194// variant. It only provides the assembly pieces for the masking variants. 195// It assumes custom ISel patterns for masking which can be provided as 196// template arguments. 197multiclass AVX512_maskable_custom<bits<8> O, Format F, 198 dag Outs, 199 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 200 string OpcodeStr, 201 string AttSrcAsm, string IntelSrcAsm, 202 list<dag> Pattern, 203 list<dag> MaskingPattern, 204 list<dag> ZeroMaskingPattern, 205 string MaskingConstraint = "", 206 bit IsCommutable = 0, 207 bit IsKCommutable = 0, 208 bit IsKZCommutable = IsCommutable, 209 string ClobberConstraint = ""> { 210 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 211 def NAME: AVX512<O, F, Outs, Ins, 212 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 213 "$dst, "#IntelSrcAsm#"}", 214 Pattern>; 215 216 // Prefer over VMOV*rrk Pat<> 217 let isCommutable = IsKCommutable in 218 def NAME#k: AVX512<O, F, Outs, MaskingIns, 219 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 220 "$dst {${mask}}, "#IntelSrcAsm#"}", 221 MaskingPattern>, 222 EVEX_K { 223 // In case of the 3src subclass this is overridden with a let. 224 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 225 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 226 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 227 } 228 229 // Zero mask does not add any restrictions to commute operands transformation. 230 // So, it is Ok to use IsCommutable instead of IsKCommutable. 231 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 232 Constraints = ClobberConstraint in 233 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 234 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 235 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 236 ZeroMaskingPattern>, 237 EVEX_KZ; 238} 239 240 241// Common base class of AVX512_maskable and AVX512_maskable_3src. 242multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 243 dag Outs, 244 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 245 string OpcodeStr, 246 string AttSrcAsm, string IntelSrcAsm, 247 dag RHS, dag MaskingRHS, 248 SDPatternOperator Select = vselect_mask, 249 string MaskingConstraint = "", 250 bit IsCommutable = 0, 251 bit IsKCommutable = 0, 252 bit IsKZCommutable = IsCommutable, 253 string ClobberConstraint = ""> : 254 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 255 AttSrcAsm, IntelSrcAsm, 256 [(set _.RC:$dst, RHS)], 257 [(set _.RC:$dst, MaskingRHS)], 258 [(set _.RC:$dst, 259 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 260 MaskingConstraint, IsCommutable, 261 IsKCommutable, IsKZCommutable, ClobberConstraint>; 262 263// This multiclass generates the unconditional/non-masking, the masking and 264// the zero-masking variant of the vector instruction. In the masking case, the 265// preserved vector elements come from a new dummy input operand tied to $dst. 266// This version uses a separate dag for non-masking and masking. 267multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 268 dag Outs, dag Ins, string OpcodeStr, 269 string AttSrcAsm, string IntelSrcAsm, 270 dag RHS, dag MaskRHS, 271 string ClobberConstraint = "", 272 bit IsCommutable = 0, bit IsKCommutable = 0, 273 bit IsKZCommutable = IsCommutable> : 274 AVX512_maskable_custom<O, F, Outs, Ins, 275 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 276 !con((ins _.KRCWM:$mask), Ins), 277 OpcodeStr, AttSrcAsm, IntelSrcAsm, 278 [(set _.RC:$dst, RHS)], 279 [(set _.RC:$dst, 280 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 281 [(set _.RC:$dst, 282 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 283 "$src0 = $dst", IsCommutable, IsKCommutable, 284 IsKZCommutable, ClobberConstraint>; 285 286// This multiclass generates the unconditional/non-masking, the masking and 287// the zero-masking variant of the vector instruction. In the masking case, the 288// preserved vector elements come from a new dummy input operand tied to $dst. 289multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 290 dag Outs, dag Ins, string OpcodeStr, 291 string AttSrcAsm, string IntelSrcAsm, 292 dag RHS, 293 bit IsCommutable = 0, bit IsKCommutable = 0, 294 bit IsKZCommutable = IsCommutable, 295 SDPatternOperator Select = vselect_mask, 296 string ClobberConstraint = ""> : 297 AVX512_maskable_common<O, F, _, Outs, Ins, 298 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 299 !con((ins _.KRCWM:$mask), Ins), 300 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 301 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 302 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 303 IsKZCommutable, ClobberConstraint>; 304 305// This multiclass generates the unconditional/non-masking, the masking and 306// the zero-masking variant of the scalar instruction. 307multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 308 dag Outs, dag Ins, string OpcodeStr, 309 string AttSrcAsm, string IntelSrcAsm, 310 dag RHS> : 311 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 312 RHS, 0, 0, 0, X86selects_mask>; 313 314// Similar to AVX512_maskable but in this case one of the source operands 315// ($src1) is already tied to $dst so we just use that for the preserved 316// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 317// $src1. 318multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 319 dag Outs, dag NonTiedIns, string OpcodeStr, 320 string AttSrcAsm, string IntelSrcAsm, 321 dag RHS, 322 bit IsCommutable = 0, 323 bit IsKCommutable = 0, 324 SDPatternOperator Select = vselect_mask, 325 bit MaskOnly = 0> : 326 AVX512_maskable_common<O, F, _, Outs, 327 !con((ins _.RC:$src1), NonTiedIns), 328 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 329 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 330 OpcodeStr, AttSrcAsm, IntelSrcAsm, 331 !if(MaskOnly, (null_frag), RHS), 332 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 333 Select, "", IsCommutable, IsKCommutable>; 334 335// Similar to AVX512_maskable_3src but in this case the input VT for the tied 336// operand differs from the output VT. This requires a bitconvert on 337// the preserved vector going into the vselect. 338// NOTE: The unmasked pattern is disabled. 339multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 340 X86VectorVTInfo InVT, 341 dag Outs, dag NonTiedIns, string OpcodeStr, 342 string AttSrcAsm, string IntelSrcAsm, 343 dag RHS, bit IsCommutable = 0> : 344 AVX512_maskable_common<O, F, OutVT, Outs, 345 !con((ins InVT.RC:$src1), NonTiedIns), 346 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 347 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 348 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 349 (vselect_mask InVT.KRCWM:$mask, RHS, 350 (bitconvert InVT.RC:$src1)), 351 vselect_mask, "", IsCommutable>; 352 353multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 354 dag Outs, dag NonTiedIns, string OpcodeStr, 355 string AttSrcAsm, string IntelSrcAsm, 356 dag RHS, 357 bit IsCommutable = 0, 358 bit IsKCommutable = 0, 359 bit MaskOnly = 0> : 360 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 361 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 362 X86selects_mask, MaskOnly>; 363 364multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 365 dag Outs, dag Ins, 366 string OpcodeStr, 367 string AttSrcAsm, string IntelSrcAsm, 368 list<dag> Pattern> : 369 AVX512_maskable_custom<O, F, Outs, Ins, 370 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 371 !con((ins _.KRCWM:$mask), Ins), 372 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 373 "$src0 = $dst">; 374 375multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 376 dag Outs, dag NonTiedIns, 377 string OpcodeStr, 378 string AttSrcAsm, string IntelSrcAsm, 379 list<dag> Pattern> : 380 AVX512_maskable_custom<O, F, Outs, 381 !con((ins _.RC:$src1), NonTiedIns), 382 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 383 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 384 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 385 "">; 386 387// Instruction with mask that puts result in mask register, 388// like "compare" and "vptest" 389multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 390 dag Outs, 391 dag Ins, dag MaskingIns, 392 string OpcodeStr, 393 string AttSrcAsm, string IntelSrcAsm, 394 list<dag> Pattern, 395 list<dag> MaskingPattern, 396 bit IsCommutable = 0> { 397 let isCommutable = IsCommutable in { 398 def NAME: AVX512<O, F, Outs, Ins, 399 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 400 "$dst, "#IntelSrcAsm#"}", 401 Pattern>; 402 403 def NAME#k: AVX512<O, F, Outs, MaskingIns, 404 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 405 "$dst {${mask}}, "#IntelSrcAsm#"}", 406 MaskingPattern>, EVEX_K; 407 } 408} 409 410multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 411 dag Outs, 412 dag Ins, dag MaskingIns, 413 string OpcodeStr, 414 string AttSrcAsm, string IntelSrcAsm, 415 dag RHS, dag MaskingRHS, 416 bit IsCommutable = 0> : 417 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 418 AttSrcAsm, IntelSrcAsm, 419 [(set _.KRC:$dst, RHS)], 420 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 421 422multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 423 dag Outs, dag Ins, string OpcodeStr, 424 string AttSrcAsm, string IntelSrcAsm, 425 dag RHS, dag RHS_su, bit IsCommutable = 0> : 426 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 427 !con((ins _.KRCWM:$mask), Ins), 428 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 429 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 430 431// Used by conversion instructions. 432multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 433 dag Outs, 434 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 435 string OpcodeStr, 436 string AttSrcAsm, string IntelSrcAsm, 437 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 438 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 439 AttSrcAsm, IntelSrcAsm, 440 [(set _.RC:$dst, RHS)], 441 [(set _.RC:$dst, MaskingRHS)], 442 [(set _.RC:$dst, ZeroMaskingRHS)], 443 "$src0 = $dst">; 444 445multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 446 dag Outs, dag NonTiedIns, string OpcodeStr, 447 string AttSrcAsm, string IntelSrcAsm, 448 dag RHS, dag MaskingRHS, bit IsCommutable, 449 bit IsKCommutable> : 450 AVX512_maskable_custom<O, F, Outs, 451 !con((ins _.RC:$src1), NonTiedIns), 452 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 453 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 454 OpcodeStr, AttSrcAsm, IntelSrcAsm, 455 [(set _.RC:$dst, RHS)], 456 [(set _.RC:$dst, 457 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 458 [(set _.RC:$dst, 459 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 460 "", IsCommutable, IsKCommutable>; 461 462// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 463// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 464// swizzled by ExecutionDomainFix to pxor. 465// We set canFoldAsLoad because this can be converted to a constant-pool 466// load of an all-zeros value if folding it would be beneficial. 467let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 468 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 469def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 470 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 471def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 472 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 473} 474 475let Predicates = [HasAVX512] in { 476def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 477def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 478def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 479def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 480def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 481} 482 483// Alias instructions that allow VPTERNLOG to be used with a mask to create 484// a mix of all ones and all zeros elements. This is done this way to force 485// the same register to be used as input for all three sources. 486let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 487def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 488 (ins VK16WM:$mask), "", 489 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 490 (v16i32 immAllOnesV), 491 (v16i32 immAllZerosV)))]>; 492def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 493 (ins VK8WM:$mask), "", 494 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 495 (v8i64 immAllOnesV), 496 (v8i64 immAllZerosV)))]>; 497} 498 499let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 500 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 501def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 502 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 503def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 504 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 505} 506 507let Predicates = [HasAVX512] in { 508def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 509def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 510def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 511def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 512def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 513def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 514def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 515def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 516def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 517def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 518} 519 520let Predicates = [HasFP16] in { 521def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 522def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 523def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 524} 525 526// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 527// This is expanded by ExpandPostRAPseudos. 528let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 529 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 530 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 531 [(set FR32X:$dst, fp32imm0)]>; 532 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 533 [(set FR64X:$dst, fp64imm0)]>; 534 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 535 [(set VR128X:$dst, fp128imm0)]>; 536} 537 538let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 539 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in { 540 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 541 [(set FR16X:$dst, fp16imm0)]>; 542} 543 544//===----------------------------------------------------------------------===// 545// AVX-512 - VECTOR INSERT 546// 547 548// Supports two different pattern operators for mask and unmasked ops. Allows 549// null_frag to be passed for one. 550multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 551 X86VectorVTInfo To, 552 SDPatternOperator vinsert_insert, 553 SDPatternOperator vinsert_for_mask, 554 X86FoldableSchedWrite sched> { 555 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 556 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 557 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 558 "vinsert" # From.EltTypeName # "x" # From.NumElts, 559 "$src3, $src2, $src1", "$src1, $src2, $src3", 560 (vinsert_insert:$src3 (To.VT To.RC:$src1), 561 (From.VT From.RC:$src2), 562 (iPTR imm)), 563 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 564 (From.VT From.RC:$src2), 565 (iPTR imm))>, 566 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 567 let mayLoad = 1 in 568 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 569 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 570 "vinsert" # From.EltTypeName # "x" # From.NumElts, 571 "$src3, $src2, $src1", "$src1, $src2, $src3", 572 (vinsert_insert:$src3 (To.VT To.RC:$src1), 573 (From.VT (From.LdFrag addr:$src2)), 574 (iPTR imm)), 575 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 576 (From.VT (From.LdFrag addr:$src2)), 577 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 578 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 579 Sched<[sched.Folded, sched.ReadAfterFold]>; 580 } 581} 582 583// Passes the same pattern operator for masked and unmasked ops. 584multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 585 X86VectorVTInfo To, 586 SDPatternOperator vinsert_insert, 587 X86FoldableSchedWrite sched> : 588 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 589 590multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 591 X86VectorVTInfo To, PatFrag vinsert_insert, 592 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 593 let Predicates = p in { 594 def : Pat<(vinsert_insert:$ins 595 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 596 (To.VT (!cast<Instruction>(InstrStr#"rr") 597 To.RC:$src1, From.RC:$src2, 598 (INSERT_get_vinsert_imm To.RC:$ins)))>; 599 600 def : Pat<(vinsert_insert:$ins 601 (To.VT To.RC:$src1), 602 (From.VT (From.LdFrag addr:$src2)), 603 (iPTR imm)), 604 (To.VT (!cast<Instruction>(InstrStr#"rm") 605 To.RC:$src1, addr:$src2, 606 (INSERT_get_vinsert_imm To.RC:$ins)))>; 607 } 608} 609 610multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 611 ValueType EltVT64, int Opcode256, 612 X86FoldableSchedWrite sched> { 613 614 let Predicates = [HasVLX] in 615 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 616 X86VectorVTInfo< 4, EltVT32, VR128X>, 617 X86VectorVTInfo< 8, EltVT32, VR256X>, 618 vinsert128_insert, sched>, EVEX_V256; 619 620 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 621 X86VectorVTInfo< 4, EltVT32, VR128X>, 622 X86VectorVTInfo<16, EltVT32, VR512>, 623 vinsert128_insert, sched>, EVEX_V512; 624 625 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 626 X86VectorVTInfo< 4, EltVT64, VR256X>, 627 X86VectorVTInfo< 8, EltVT64, VR512>, 628 vinsert256_insert, sched>, VEX_W, EVEX_V512; 629 630 // Even with DQI we'd like to only use these instructions for masking. 631 let Predicates = [HasVLX, HasDQI] in 632 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 633 X86VectorVTInfo< 2, EltVT64, VR128X>, 634 X86VectorVTInfo< 4, EltVT64, VR256X>, 635 null_frag, vinsert128_insert, sched>, 636 VEX_W1X, EVEX_V256; 637 638 // Even with DQI we'd like to only use these instructions for masking. 639 let Predicates = [HasDQI] in { 640 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 641 X86VectorVTInfo< 2, EltVT64, VR128X>, 642 X86VectorVTInfo< 8, EltVT64, VR512>, 643 null_frag, vinsert128_insert, sched>, 644 VEX_W, EVEX_V512; 645 646 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 647 X86VectorVTInfo< 8, EltVT32, VR256X>, 648 X86VectorVTInfo<16, EltVT32, VR512>, 649 null_frag, vinsert256_insert, sched>, 650 EVEX_V512; 651 } 652} 653 654// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 655defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 656defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 657 658// Codegen pattern with the alternative types, 659// Even with AVX512DQ we'll still use these for unmasked operations. 660defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 661 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 662defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 663 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 664 665defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 666 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 667defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 668 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 669 670defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 671 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 672defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 673 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 674 675// Codegen pattern with the alternative types insert VEC128 into VEC256 676defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 677 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 678defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 679 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 680defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, 681 vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>; 682// Codegen pattern with the alternative types insert VEC128 into VEC512 683defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 684 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 685defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 686 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 687defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, 688 vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>; 689// Codegen pattern with the alternative types insert VEC256 into VEC512 690defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 691 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 692defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 693 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 694defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, 695 vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>; 696 697 698multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 699 X86VectorVTInfo To, X86VectorVTInfo Cast, 700 PatFrag vinsert_insert, 701 SDNodeXForm INSERT_get_vinsert_imm, 702 list<Predicate> p> { 703let Predicates = p in { 704 def : Pat<(Cast.VT 705 (vselect_mask Cast.KRCWM:$mask, 706 (bitconvert 707 (vinsert_insert:$ins (To.VT To.RC:$src1), 708 (From.VT From.RC:$src2), 709 (iPTR imm))), 710 Cast.RC:$src0)), 711 (!cast<Instruction>(InstrStr#"rrk") 712 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 713 (INSERT_get_vinsert_imm To.RC:$ins))>; 714 def : Pat<(Cast.VT 715 (vselect_mask Cast.KRCWM:$mask, 716 (bitconvert 717 (vinsert_insert:$ins (To.VT To.RC:$src1), 718 (From.VT 719 (bitconvert 720 (From.LdFrag addr:$src2))), 721 (iPTR imm))), 722 Cast.RC:$src0)), 723 (!cast<Instruction>(InstrStr#"rmk") 724 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 725 (INSERT_get_vinsert_imm To.RC:$ins))>; 726 727 def : Pat<(Cast.VT 728 (vselect_mask Cast.KRCWM:$mask, 729 (bitconvert 730 (vinsert_insert:$ins (To.VT To.RC:$src1), 731 (From.VT From.RC:$src2), 732 (iPTR imm))), 733 Cast.ImmAllZerosV)), 734 (!cast<Instruction>(InstrStr#"rrkz") 735 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 736 (INSERT_get_vinsert_imm To.RC:$ins))>; 737 def : Pat<(Cast.VT 738 (vselect_mask Cast.KRCWM:$mask, 739 (bitconvert 740 (vinsert_insert:$ins (To.VT To.RC:$src1), 741 (From.VT (From.LdFrag addr:$src2)), 742 (iPTR imm))), 743 Cast.ImmAllZerosV)), 744 (!cast<Instruction>(InstrStr#"rmkz") 745 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 746 (INSERT_get_vinsert_imm To.RC:$ins))>; 747} 748} 749 750defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 751 v8f32x_info, vinsert128_insert, 752 INSERT_get_vinsert128_imm, [HasVLX]>; 753defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 754 v4f64x_info, vinsert128_insert, 755 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 756 757defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 758 v8i32x_info, vinsert128_insert, 759 INSERT_get_vinsert128_imm, [HasVLX]>; 760defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 761 v8i32x_info, vinsert128_insert, 762 INSERT_get_vinsert128_imm, [HasVLX]>; 763defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 764 v8i32x_info, vinsert128_insert, 765 INSERT_get_vinsert128_imm, [HasVLX]>; 766defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 767 v4i64x_info, vinsert128_insert, 768 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 769defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 770 v4i64x_info, vinsert128_insert, 771 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 772defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 773 v4i64x_info, vinsert128_insert, 774 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 775 776defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 777 v16f32_info, vinsert128_insert, 778 INSERT_get_vinsert128_imm, [HasAVX512]>; 779defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 780 v8f64_info, vinsert128_insert, 781 INSERT_get_vinsert128_imm, [HasDQI]>; 782 783defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 784 v16i32_info, vinsert128_insert, 785 INSERT_get_vinsert128_imm, [HasAVX512]>; 786defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 787 v16i32_info, vinsert128_insert, 788 INSERT_get_vinsert128_imm, [HasAVX512]>; 789defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 790 v16i32_info, vinsert128_insert, 791 INSERT_get_vinsert128_imm, [HasAVX512]>; 792defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 793 v8i64_info, vinsert128_insert, 794 INSERT_get_vinsert128_imm, [HasDQI]>; 795defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 796 v8i64_info, vinsert128_insert, 797 INSERT_get_vinsert128_imm, [HasDQI]>; 798defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 799 v8i64_info, vinsert128_insert, 800 INSERT_get_vinsert128_imm, [HasDQI]>; 801 802defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 803 v16f32_info, vinsert256_insert, 804 INSERT_get_vinsert256_imm, [HasDQI]>; 805defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 806 v8f64_info, vinsert256_insert, 807 INSERT_get_vinsert256_imm, [HasAVX512]>; 808 809defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 810 v16i32_info, vinsert256_insert, 811 INSERT_get_vinsert256_imm, [HasDQI]>; 812defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 813 v16i32_info, vinsert256_insert, 814 INSERT_get_vinsert256_imm, [HasDQI]>; 815defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 816 v16i32_info, vinsert256_insert, 817 INSERT_get_vinsert256_imm, [HasDQI]>; 818defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 819 v8i64_info, vinsert256_insert, 820 INSERT_get_vinsert256_imm, [HasAVX512]>; 821defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 822 v8i64_info, vinsert256_insert, 823 INSERT_get_vinsert256_imm, [HasAVX512]>; 824defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 825 v8i64_info, vinsert256_insert, 826 INSERT_get_vinsert256_imm, [HasAVX512]>; 827 828// vinsertps - insert f32 to XMM 829let ExeDomain = SSEPackedSingle in { 830let isCommutable = 1 in 831def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 832 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 833 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 834 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 835 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 836def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 837 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 838 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 839 [(set VR128X:$dst, (X86insertps VR128X:$src1, 840 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 841 timm:$src3))]>, 842 EVEX_4V, EVEX_CD8<32, CD8VT1>, 843 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 844} 845 846//===----------------------------------------------------------------------===// 847// AVX-512 VECTOR EXTRACT 848//--- 849 850// Supports two different pattern operators for mask and unmasked ops. Allows 851// null_frag to be passed for one. 852multiclass vextract_for_size_split<int Opcode, 853 X86VectorVTInfo From, X86VectorVTInfo To, 854 SDPatternOperator vextract_extract, 855 SDPatternOperator vextract_for_mask, 856 SchedWrite SchedRR, SchedWrite SchedMR> { 857 858 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 859 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 860 (ins From.RC:$src1, u8imm:$idx), 861 "vextract" # To.EltTypeName # "x" # To.NumElts, 862 "$idx, $src1", "$src1, $idx", 863 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 864 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 865 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 866 867 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 868 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 869 "vextract" # To.EltTypeName # "x" # To.NumElts # 870 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 871 [(store (To.VT (vextract_extract:$idx 872 (From.VT From.RC:$src1), (iPTR imm))), 873 addr:$dst)]>, EVEX, 874 Sched<[SchedMR]>; 875 876 let mayStore = 1, hasSideEffects = 0 in 877 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 878 (ins To.MemOp:$dst, To.KRCWM:$mask, 879 From.RC:$src1, u8imm:$idx), 880 "vextract" # To.EltTypeName # "x" # To.NumElts # 881 "\t{$idx, $src1, $dst {${mask}}|" 882 "$dst {${mask}}, $src1, $idx}", []>, 883 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 884 } 885} 886 887// Passes the same pattern operator for masked and unmasked ops. 888multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 889 X86VectorVTInfo To, 890 SDPatternOperator vextract_extract, 891 SchedWrite SchedRR, SchedWrite SchedMR> : 892 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 893 894// Codegen pattern for the alternative types 895multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 896 X86VectorVTInfo To, PatFrag vextract_extract, 897 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 898 let Predicates = p in { 899 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 900 (To.VT (!cast<Instruction>(InstrStr#"rr") 901 From.RC:$src1, 902 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 903 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 904 (iPTR imm))), addr:$dst), 905 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 906 (EXTRACT_get_vextract_imm To.RC:$ext))>; 907 } 908} 909 910multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 911 ValueType EltVT64, int Opcode256, 912 SchedWrite SchedRR, SchedWrite SchedMR> { 913 let Predicates = [HasAVX512] in { 914 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 915 X86VectorVTInfo<16, EltVT32, VR512>, 916 X86VectorVTInfo< 4, EltVT32, VR128X>, 917 vextract128_extract, SchedRR, SchedMR>, 918 EVEX_V512, EVEX_CD8<32, CD8VT4>; 919 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 920 X86VectorVTInfo< 8, EltVT64, VR512>, 921 X86VectorVTInfo< 4, EltVT64, VR256X>, 922 vextract256_extract, SchedRR, SchedMR>, 923 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 924 } 925 let Predicates = [HasVLX] in 926 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 927 X86VectorVTInfo< 8, EltVT32, VR256X>, 928 X86VectorVTInfo< 4, EltVT32, VR128X>, 929 vextract128_extract, SchedRR, SchedMR>, 930 EVEX_V256, EVEX_CD8<32, CD8VT4>; 931 932 // Even with DQI we'd like to only use these instructions for masking. 933 let Predicates = [HasVLX, HasDQI] in 934 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 935 X86VectorVTInfo< 4, EltVT64, VR256X>, 936 X86VectorVTInfo< 2, EltVT64, VR128X>, 937 null_frag, vextract128_extract, SchedRR, SchedMR>, 938 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 939 940 // Even with DQI we'd like to only use these instructions for masking. 941 let Predicates = [HasDQI] in { 942 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 943 X86VectorVTInfo< 8, EltVT64, VR512>, 944 X86VectorVTInfo< 2, EltVT64, VR128X>, 945 null_frag, vextract128_extract, SchedRR, SchedMR>, 946 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 947 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 948 X86VectorVTInfo<16, EltVT32, VR512>, 949 X86VectorVTInfo< 8, EltVT32, VR256X>, 950 null_frag, vextract256_extract, SchedRR, SchedMR>, 951 EVEX_V512, EVEX_CD8<32, CD8VT8>; 952 } 953} 954 955// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 956defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 957defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 958 959// extract_subvector codegen patterns with the alternative types. 960// Even with AVX512DQ we'll still use these for unmasked operations. 961defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 962 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 963defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 964 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 965 966defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 967 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 968defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 969 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 970 971defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 972 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 973defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 974 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 975 976// Codegen pattern with the alternative types extract VEC128 from VEC256 977defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 978 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 979defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 980 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 981defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, 982 vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>; 983 984// Codegen pattern with the alternative types extract VEC128 from VEC512 985defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 986 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 987defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 988 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 989defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, 990 vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>; 991// Codegen pattern with the alternative types extract VEC256 from VEC512 992defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 993 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 994defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 995 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 996defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, 997 vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>; 998 999 1000// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1001// smaller extract to enable EVEX->VEX. 1002let Predicates = [NoVLX] in { 1003def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1004 (v2i64 (VEXTRACTI128rr 1005 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1006 (iPTR 1)))>; 1007def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1008 (v2f64 (VEXTRACTF128rr 1009 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1010 (iPTR 1)))>; 1011def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1012 (v4i32 (VEXTRACTI128rr 1013 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1014 (iPTR 1)))>; 1015def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1016 (v4f32 (VEXTRACTF128rr 1017 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1018 (iPTR 1)))>; 1019def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1020 (v8i16 (VEXTRACTI128rr 1021 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1022 (iPTR 1)))>; 1023def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1024 (v16i8 (VEXTRACTI128rr 1025 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1026 (iPTR 1)))>; 1027} 1028 1029// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1030// smaller extract to enable EVEX->VEX. 1031let Predicates = [HasVLX] in { 1032def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1033 (v2i64 (VEXTRACTI32x4Z256rr 1034 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1035 (iPTR 1)))>; 1036def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1037 (v2f64 (VEXTRACTF32x4Z256rr 1038 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1039 (iPTR 1)))>; 1040def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1041 (v4i32 (VEXTRACTI32x4Z256rr 1042 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1043 (iPTR 1)))>; 1044def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1045 (v4f32 (VEXTRACTF32x4Z256rr 1046 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1047 (iPTR 1)))>; 1048def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1049 (v8i16 (VEXTRACTI32x4Z256rr 1050 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1051 (iPTR 1)))>; 1052def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1053 (v16i8 (VEXTRACTI32x4Z256rr 1054 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1055 (iPTR 1)))>; 1056} 1057 1058let Predicates = [HasFP16, HasVLX] in 1059def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 1060 (v8f16 (VEXTRACTF32x4Z256rr 1061 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 1062 (iPTR 1)))>; 1063 1064 1065// Additional patterns for handling a bitcast between the vselect and the 1066// extract_subvector. 1067multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 1068 X86VectorVTInfo To, X86VectorVTInfo Cast, 1069 PatFrag vextract_extract, 1070 SDNodeXForm EXTRACT_get_vextract_imm, 1071 list<Predicate> p> { 1072let Predicates = p in { 1073 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1074 (bitconvert 1075 (To.VT (vextract_extract:$ext 1076 (From.VT From.RC:$src), (iPTR imm)))), 1077 To.RC:$src0)), 1078 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1079 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1080 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1081 1082 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1083 (bitconvert 1084 (To.VT (vextract_extract:$ext 1085 (From.VT From.RC:$src), (iPTR imm)))), 1086 Cast.ImmAllZerosV)), 1087 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1088 Cast.KRCWM:$mask, From.RC:$src, 1089 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1090} 1091} 1092 1093defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1094 v4f32x_info, vextract128_extract, 1095 EXTRACT_get_vextract128_imm, [HasVLX]>; 1096defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1097 v2f64x_info, vextract128_extract, 1098 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1099 1100defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1101 v4i32x_info, vextract128_extract, 1102 EXTRACT_get_vextract128_imm, [HasVLX]>; 1103defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1104 v4i32x_info, vextract128_extract, 1105 EXTRACT_get_vextract128_imm, [HasVLX]>; 1106defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1107 v4i32x_info, vextract128_extract, 1108 EXTRACT_get_vextract128_imm, [HasVLX]>; 1109defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1110 v2i64x_info, vextract128_extract, 1111 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1112defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1113 v2i64x_info, vextract128_extract, 1114 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1115defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1116 v2i64x_info, vextract128_extract, 1117 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1118 1119defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1120 v4f32x_info, vextract128_extract, 1121 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1122defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1123 v2f64x_info, vextract128_extract, 1124 EXTRACT_get_vextract128_imm, [HasDQI]>; 1125 1126defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1127 v4i32x_info, vextract128_extract, 1128 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1129defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1130 v4i32x_info, vextract128_extract, 1131 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1132defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1133 v4i32x_info, vextract128_extract, 1134 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1135defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1136 v2i64x_info, vextract128_extract, 1137 EXTRACT_get_vextract128_imm, [HasDQI]>; 1138defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1139 v2i64x_info, vextract128_extract, 1140 EXTRACT_get_vextract128_imm, [HasDQI]>; 1141defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1142 v2i64x_info, vextract128_extract, 1143 EXTRACT_get_vextract128_imm, [HasDQI]>; 1144 1145defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1146 v8f32x_info, vextract256_extract, 1147 EXTRACT_get_vextract256_imm, [HasDQI]>; 1148defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1149 v4f64x_info, vextract256_extract, 1150 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1151 1152defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1153 v8i32x_info, vextract256_extract, 1154 EXTRACT_get_vextract256_imm, [HasDQI]>; 1155defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1156 v8i32x_info, vextract256_extract, 1157 EXTRACT_get_vextract256_imm, [HasDQI]>; 1158defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1159 v8i32x_info, vextract256_extract, 1160 EXTRACT_get_vextract256_imm, [HasDQI]>; 1161defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1162 v4i64x_info, vextract256_extract, 1163 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1164defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1165 v4i64x_info, vextract256_extract, 1166 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1167defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1168 v4i64x_info, vextract256_extract, 1169 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1170 1171// vextractps - extract 32 bits from XMM 1172def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1173 (ins VR128X:$src1, u8imm:$src2), 1174 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1175 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1176 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1177 1178def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1179 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1180 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1181 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1182 addr:$dst)]>, 1183 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1184 1185//===---------------------------------------------------------------------===// 1186// AVX-512 BROADCAST 1187//--- 1188// broadcast with a scalar argument. 1189multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1190 X86VectorVTInfo SrcInfo> { 1191 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1192 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1193 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1194 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1195 (X86VBroadcast SrcInfo.FRC:$src), 1196 DestInfo.RC:$src0)), 1197 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1198 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1199 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1200 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1201 (X86VBroadcast SrcInfo.FRC:$src), 1202 DestInfo.ImmAllZerosV)), 1203 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1204 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1205} 1206 1207// Split version to allow mask and broadcast node to be different types. This 1208// helps support the 32x2 broadcasts. 1209multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1210 SchedWrite SchedRR, SchedWrite SchedRM, 1211 X86VectorVTInfo MaskInfo, 1212 X86VectorVTInfo DestInfo, 1213 X86VectorVTInfo SrcInfo, 1214 bit IsConvertibleToThreeAddress, 1215 SDPatternOperator UnmaskedOp = X86VBroadcast, 1216 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1217 let hasSideEffects = 0 in 1218 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1219 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1220 [(set MaskInfo.RC:$dst, 1221 (MaskInfo.VT 1222 (bitconvert 1223 (DestInfo.VT 1224 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1225 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1226 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1227 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1228 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1229 "${dst} {${mask}} {z}, $src}"), 1230 [(set MaskInfo.RC:$dst, 1231 (vselect_mask MaskInfo.KRCWM:$mask, 1232 (MaskInfo.VT 1233 (bitconvert 1234 (DestInfo.VT 1235 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1236 MaskInfo.ImmAllZerosV))], 1237 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1238 let Constraints = "$src0 = $dst" in 1239 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1240 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1241 SrcInfo.RC:$src), 1242 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1243 "${dst} {${mask}}, $src}"), 1244 [(set MaskInfo.RC:$dst, 1245 (vselect_mask MaskInfo.KRCWM:$mask, 1246 (MaskInfo.VT 1247 (bitconvert 1248 (DestInfo.VT 1249 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1250 MaskInfo.RC:$src0))], 1251 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1252 1253 let hasSideEffects = 0, mayLoad = 1 in 1254 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1255 (ins SrcInfo.ScalarMemOp:$src), 1256 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1257 [(set MaskInfo.RC:$dst, 1258 (MaskInfo.VT 1259 (bitconvert 1260 (DestInfo.VT 1261 (UnmaskedBcastOp addr:$src)))))], 1262 DestInfo.ExeDomain>, T8PD, EVEX, 1263 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1264 1265 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1266 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1267 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1268 "${dst} {${mask}} {z}, $src}"), 1269 [(set MaskInfo.RC:$dst, 1270 (vselect_mask MaskInfo.KRCWM:$mask, 1271 (MaskInfo.VT 1272 (bitconvert 1273 (DestInfo.VT 1274 (SrcInfo.BroadcastLdFrag addr:$src)))), 1275 MaskInfo.ImmAllZerosV))], 1276 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1277 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1278 1279 let Constraints = "$src0 = $dst", 1280 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1281 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1282 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1283 SrcInfo.ScalarMemOp:$src), 1284 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1285 "${dst} {${mask}}, $src}"), 1286 [(set MaskInfo.RC:$dst, 1287 (vselect_mask MaskInfo.KRCWM:$mask, 1288 (MaskInfo.VT 1289 (bitconvert 1290 (DestInfo.VT 1291 (SrcInfo.BroadcastLdFrag addr:$src)))), 1292 MaskInfo.RC:$src0))], 1293 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1294 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1295} 1296 1297// Helper class to force mask and broadcast result to same type. 1298multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1299 SchedWrite SchedRR, SchedWrite SchedRM, 1300 X86VectorVTInfo DestInfo, 1301 X86VectorVTInfo SrcInfo, 1302 bit IsConvertibleToThreeAddress> : 1303 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1304 DestInfo, DestInfo, SrcInfo, 1305 IsConvertibleToThreeAddress>; 1306 1307multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1308 AVX512VLVectorVTInfo _> { 1309 let Predicates = [HasAVX512] in { 1310 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1311 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1312 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1313 EVEX_V512; 1314 } 1315 1316 let Predicates = [HasVLX] in { 1317 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1318 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1319 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1320 EVEX_V256; 1321 } 1322} 1323 1324multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1325 AVX512VLVectorVTInfo _> { 1326 let Predicates = [HasAVX512] in { 1327 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1328 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1329 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1330 EVEX_V512; 1331 } 1332 1333 let Predicates = [HasVLX] in { 1334 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1335 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1336 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1337 EVEX_V256; 1338 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1339 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1340 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1341 EVEX_V128; 1342 } 1343} 1344defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1345 avx512vl_f32_info>; 1346defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1347 avx512vl_f64_info>, VEX_W1X; 1348 1349multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1350 X86VectorVTInfo _, SDPatternOperator OpNode, 1351 RegisterClass SrcRC> { 1352 // Fold with a mask even if it has multiple uses since it is cheap. 1353 let ExeDomain = _.ExeDomain in 1354 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1355 (ins SrcRC:$src), 1356 "vpbroadcast"#_.Suffix, "$src", "$src", 1357 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1358 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1359 T8PD, EVEX, Sched<[SchedRR]>; 1360} 1361 1362multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1363 X86VectorVTInfo _, SDPatternOperator OpNode, 1364 RegisterClass SrcRC, SubRegIndex Subreg> { 1365 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1366 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1367 (outs _.RC:$dst), (ins GR32:$src), 1368 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1369 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1370 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1371 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1372 1373 def : Pat <(_.VT (OpNode SrcRC:$src)), 1374 (!cast<Instruction>(Name#rr) 1375 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1376 1377 // Fold with a mask even if it has multiple uses since it is cheap. 1378 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1379 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1380 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1381 1382 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1383 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1384 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1385} 1386 1387multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1388 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1389 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1390 let Predicates = [prd] in 1391 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1392 OpNode, SrcRC, Subreg>, EVEX_V512; 1393 let Predicates = [prd, HasVLX] in { 1394 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1395 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1396 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1397 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1398 } 1399} 1400 1401multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1402 SDPatternOperator OpNode, 1403 RegisterClass SrcRC, Predicate prd> { 1404 let Predicates = [prd] in 1405 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1406 SrcRC>, EVEX_V512; 1407 let Predicates = [prd, HasVLX] in { 1408 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1409 SrcRC>, EVEX_V256; 1410 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1411 SrcRC>, EVEX_V128; 1412 } 1413} 1414 1415defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1416 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1417defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1418 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1419 HasBWI>; 1420defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1421 X86VBroadcast, GR32, HasAVX512>; 1422defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1423 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1424 1425multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1426 AVX512VLVectorVTInfo _, Predicate prd, 1427 bit IsConvertibleToThreeAddress> { 1428 let Predicates = [prd] in { 1429 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1430 WriteShuffle256Ld, _.info512, _.info128, 1431 IsConvertibleToThreeAddress>, 1432 EVEX_V512; 1433 } 1434 let Predicates = [prd, HasVLX] in { 1435 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1436 WriteShuffle256Ld, _.info256, _.info128, 1437 IsConvertibleToThreeAddress>, 1438 EVEX_V256; 1439 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1440 WriteShuffleXLd, _.info128, _.info128, 1441 IsConvertibleToThreeAddress>, 1442 EVEX_V128; 1443 } 1444} 1445 1446defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1447 avx512vl_i8_info, HasBWI, 0>; 1448defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1449 avx512vl_i16_info, HasBWI, 0>; 1450defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1451 avx512vl_i32_info, HasAVX512, 1>; 1452defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1453 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1454 1455multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1456 SDPatternOperator OpNode, 1457 X86VectorVTInfo _Dst, 1458 X86VectorVTInfo _Src> { 1459 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1460 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1461 (_Dst.VT (OpNode addr:$src))>, 1462 Sched<[SchedWriteShuffle.YMM.Folded]>, 1463 AVX5128IBase, EVEX; 1464} 1465 1466// This should be used for the AVX512DQ broadcast instructions. It disables 1467// the unmasked patterns so that we only use the DQ instructions when masking 1468// is requested. 1469multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1470 SDPatternOperator OpNode, 1471 X86VectorVTInfo _Dst, 1472 X86VectorVTInfo _Src> { 1473 let hasSideEffects = 0, mayLoad = 1 in 1474 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1475 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1476 (null_frag), 1477 (_Dst.VT (OpNode addr:$src))>, 1478 Sched<[SchedWriteShuffle.YMM.Folded]>, 1479 AVX5128IBase, EVEX; 1480} 1481let Predicates = [HasFP16] in { 1482 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1483 (VPBROADCASTWZrm addr:$src)>; 1484 1485 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1486 (VPBROADCASTWZrr VR128X:$src)>; 1487 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1488 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1489} 1490let Predicates = [HasVLX, HasFP16] in { 1491 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1492 (VPBROADCASTWZ128rm addr:$src)>; 1493 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1494 (VPBROADCASTWZ256rm addr:$src)>; 1495 1496 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1497 (VPBROADCASTWZ128rr VR128X:$src)>; 1498 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1499 (VPBROADCASTWZ256rr VR128X:$src)>; 1500 1501 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1502 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1503 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1504 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1505} 1506 1507//===----------------------------------------------------------------------===// 1508// AVX-512 BROADCAST SUBVECTORS 1509// 1510 1511defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1512 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1513 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1514defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1515 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1516 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1517defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1518 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, 1519 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1520defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1521 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, 1522 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1523 1524let Predicates = [HasAVX512] in { 1525def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1526 (VBROADCASTF64X4rm addr:$src)>; 1527def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1528 (VBROADCASTF64X4rm addr:$src)>; 1529def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1530 (VBROADCASTF64X4rm addr:$src)>; 1531def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1532 (VBROADCASTI64X4rm addr:$src)>; 1533def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1534 (VBROADCASTI64X4rm addr:$src)>; 1535def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1536 (VBROADCASTI64X4rm addr:$src)>; 1537def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1538 (VBROADCASTI64X4rm addr:$src)>; 1539 1540def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1541 (VBROADCASTF32X4rm addr:$src)>; 1542def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1543 (VBROADCASTF32X4rm addr:$src)>; 1544def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1545 (VBROADCASTF32X4rm addr:$src)>; 1546def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1547 (VBROADCASTI32X4rm addr:$src)>; 1548def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1549 (VBROADCASTI32X4rm addr:$src)>; 1550def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1551 (VBROADCASTI32X4rm addr:$src)>; 1552def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1553 (VBROADCASTI32X4rm addr:$src)>; 1554 1555// Patterns for selects of bitcasted operations. 1556def : Pat<(vselect_mask VK16WM:$mask, 1557 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1558 (v16f32 immAllZerosV)), 1559 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1560def : Pat<(vselect_mask VK16WM:$mask, 1561 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1562 VR512:$src0), 1563 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1564def : Pat<(vselect_mask VK16WM:$mask, 1565 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1566 (v16i32 immAllZerosV)), 1567 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1568def : Pat<(vselect_mask VK16WM:$mask, 1569 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1570 VR512:$src0), 1571 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1572 1573def : Pat<(vselect_mask VK8WM:$mask, 1574 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1575 (v8f64 immAllZerosV)), 1576 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1577def : Pat<(vselect_mask VK8WM:$mask, 1578 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1579 VR512:$src0), 1580 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1581def : Pat<(vselect_mask VK8WM:$mask, 1582 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1583 (v8i64 immAllZerosV)), 1584 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1585def : Pat<(vselect_mask VK8WM:$mask, 1586 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1587 VR512:$src0), 1588 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1589} 1590 1591let Predicates = [HasVLX] in { 1592defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1593 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1594 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1595defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1596 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1597 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1598 1599def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1600 (VBROADCASTF32X4Z256rm addr:$src)>; 1601def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1602 (VBROADCASTF32X4Z256rm addr:$src)>; 1603def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1604 (VBROADCASTF32X4Z256rm addr:$src)>; 1605def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1606 (VBROADCASTI32X4Z256rm addr:$src)>; 1607def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1608 (VBROADCASTI32X4Z256rm addr:$src)>; 1609def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1610 (VBROADCASTI32X4Z256rm addr:$src)>; 1611def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1612 (VBROADCASTI32X4Z256rm addr:$src)>; 1613 1614// Patterns for selects of bitcasted operations. 1615def : Pat<(vselect_mask VK8WM:$mask, 1616 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1617 (v8f32 immAllZerosV)), 1618 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1619def : Pat<(vselect_mask VK8WM:$mask, 1620 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1621 VR256X:$src0), 1622 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1623def : Pat<(vselect_mask VK8WM:$mask, 1624 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1625 (v8i32 immAllZerosV)), 1626 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1627def : Pat<(vselect_mask VK8WM:$mask, 1628 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1629 VR256X:$src0), 1630 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1631} 1632 1633let Predicates = [HasVLX, HasDQI] in { 1634defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1635 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, 1636 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1637defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1638 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, 1639 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1640 1641// Patterns for selects of bitcasted operations. 1642def : Pat<(vselect_mask VK4WM:$mask, 1643 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1644 (v4f64 immAllZerosV)), 1645 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1646def : Pat<(vselect_mask VK4WM:$mask, 1647 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1648 VR256X:$src0), 1649 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1650def : Pat<(vselect_mask VK4WM:$mask, 1651 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1652 (v4i64 immAllZerosV)), 1653 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1654def : Pat<(vselect_mask VK4WM:$mask, 1655 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1656 VR256X:$src0), 1657 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1658} 1659 1660let Predicates = [HasDQI] in { 1661defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1662 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, 1663 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1664defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1665 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1666 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1667defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1668 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, 1669 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1670defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1671 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1672 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1673 1674// Patterns for selects of bitcasted operations. 1675def : Pat<(vselect_mask VK16WM:$mask, 1676 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1677 (v16f32 immAllZerosV)), 1678 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1679def : Pat<(vselect_mask VK16WM:$mask, 1680 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1681 VR512:$src0), 1682 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1683def : Pat<(vselect_mask VK16WM:$mask, 1684 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1685 (v16i32 immAllZerosV)), 1686 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1687def : Pat<(vselect_mask VK16WM:$mask, 1688 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1689 VR512:$src0), 1690 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1691 1692def : Pat<(vselect_mask VK8WM:$mask, 1693 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1694 (v8f64 immAllZerosV)), 1695 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1696def : Pat<(vselect_mask VK8WM:$mask, 1697 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1698 VR512:$src0), 1699 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1700def : Pat<(vselect_mask VK8WM:$mask, 1701 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1702 (v8i64 immAllZerosV)), 1703 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1704def : Pat<(vselect_mask VK8WM:$mask, 1705 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1706 VR512:$src0), 1707 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1708} 1709 1710multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1711 AVX512VLVectorVTInfo _Dst, 1712 AVX512VLVectorVTInfo _Src> { 1713 let Predicates = [HasDQI] in 1714 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1715 WriteShuffle256Ld, _Dst.info512, 1716 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1717 EVEX_V512; 1718 let Predicates = [HasDQI, HasVLX] in 1719 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1720 WriteShuffle256Ld, _Dst.info256, 1721 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1722 EVEX_V256; 1723} 1724 1725multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1726 AVX512VLVectorVTInfo _Dst, 1727 AVX512VLVectorVTInfo _Src> : 1728 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1729 1730 let Predicates = [HasDQI, HasVLX] in 1731 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1732 WriteShuffleXLd, _Dst.info128, 1733 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1734 EVEX_V128; 1735} 1736 1737defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1738 avx512vl_i32_info, avx512vl_i64_info>; 1739defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1740 avx512vl_f32_info, avx512vl_f64_info>; 1741 1742//===----------------------------------------------------------------------===// 1743// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1744//--- 1745multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1746 X86VectorVTInfo _, RegisterClass KRC> { 1747 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1748 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1749 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1750 EVEX, Sched<[WriteShuffle]>; 1751} 1752 1753multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1754 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1755 let Predicates = [HasCDI] in 1756 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1757 let Predicates = [HasCDI, HasVLX] in { 1758 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1759 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1760 } 1761} 1762 1763defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1764 avx512vl_i32_info, VK16>; 1765defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1766 avx512vl_i64_info, VK8>, VEX_W; 1767 1768//===----------------------------------------------------------------------===// 1769// -- VPERMI2 - 3 source operands form -- 1770multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1771 X86FoldableSchedWrite sched, 1772 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1773let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1774 hasSideEffects = 0 in { 1775 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1776 (ins _.RC:$src2, _.RC:$src3), 1777 OpcodeStr, "$src3, $src2", "$src2, $src3", 1778 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1779 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1780 1781 let mayLoad = 1 in 1782 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1783 (ins _.RC:$src2, _.MemOp:$src3), 1784 OpcodeStr, "$src3, $src2", "$src2, $src3", 1785 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1786 (_.VT (_.LdFrag addr:$src3)))), 1>, 1787 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1788 } 1789} 1790 1791multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1792 X86FoldableSchedWrite sched, 1793 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1794 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1795 hasSideEffects = 0, mayLoad = 1 in 1796 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1797 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1798 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1799 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1800 (_.VT (X86VPermt2 _.RC:$src2, 1801 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1802 AVX5128IBase, EVEX_4V, EVEX_B, 1803 Sched<[sched.Folded, sched.ReadAfterFold]>; 1804} 1805 1806multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1807 X86FoldableSchedWrite sched, 1808 AVX512VLVectorVTInfo VTInfo, 1809 AVX512VLVectorVTInfo ShuffleMask> { 1810 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1811 ShuffleMask.info512>, 1812 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1813 ShuffleMask.info512>, EVEX_V512; 1814 let Predicates = [HasVLX] in { 1815 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1816 ShuffleMask.info128>, 1817 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1818 ShuffleMask.info128>, EVEX_V128; 1819 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1820 ShuffleMask.info256>, 1821 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1822 ShuffleMask.info256>, EVEX_V256; 1823 } 1824} 1825 1826multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1827 X86FoldableSchedWrite sched, 1828 AVX512VLVectorVTInfo VTInfo, 1829 AVX512VLVectorVTInfo Idx, 1830 Predicate Prd> { 1831 let Predicates = [Prd] in 1832 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1833 Idx.info512>, EVEX_V512; 1834 let Predicates = [Prd, HasVLX] in { 1835 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1836 Idx.info128>, EVEX_V128; 1837 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1838 Idx.info256>, EVEX_V256; 1839 } 1840} 1841 1842defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1843 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1844defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1845 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1846defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1847 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1848 VEX_W, EVEX_CD8<16, CD8VF>; 1849defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1850 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1851 EVEX_CD8<8, CD8VF>; 1852defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1853 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1854defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1855 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1856 1857// Extra patterns to deal with extra bitcasts due to passthru and index being 1858// different types on the fp versions. 1859multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1860 X86VectorVTInfo IdxVT, 1861 X86VectorVTInfo CastVT> { 1862 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1863 (X86VPermt2 (_.VT _.RC:$src2), 1864 (IdxVT.VT (bitconvert 1865 (CastVT.VT _.RC:$src1))), 1866 _.RC:$src3), 1867 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1868 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1869 _.RC:$src2, _.RC:$src3)>; 1870 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1871 (X86VPermt2 _.RC:$src2, 1872 (IdxVT.VT (bitconvert 1873 (CastVT.VT _.RC:$src1))), 1874 (_.LdFrag addr:$src3)), 1875 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1876 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1877 _.RC:$src2, addr:$src3)>; 1878 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1879 (X86VPermt2 _.RC:$src2, 1880 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1881 (_.BroadcastLdFrag addr:$src3)), 1882 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1883 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1884 _.RC:$src2, addr:$src3)>; 1885} 1886 1887// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1888defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1889defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1890defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1891 1892// VPERMT2 1893multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1894 X86FoldableSchedWrite sched, 1895 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1896let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1897 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1898 (ins IdxVT.RC:$src2, _.RC:$src3), 1899 OpcodeStr, "$src3, $src2", "$src2, $src3", 1900 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1901 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1902 1903 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1904 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1905 OpcodeStr, "$src3, $src2", "$src2, $src3", 1906 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1907 (_.LdFrag addr:$src3))), 1>, 1908 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1909 } 1910} 1911multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1912 X86FoldableSchedWrite sched, 1913 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1914 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1915 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1916 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1917 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1918 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1919 (_.VT (X86VPermt2 _.RC:$src1, 1920 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1921 AVX5128IBase, EVEX_4V, EVEX_B, 1922 Sched<[sched.Folded, sched.ReadAfterFold]>; 1923} 1924 1925multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1926 X86FoldableSchedWrite sched, 1927 AVX512VLVectorVTInfo VTInfo, 1928 AVX512VLVectorVTInfo ShuffleMask> { 1929 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1930 ShuffleMask.info512>, 1931 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1932 ShuffleMask.info512>, EVEX_V512; 1933 let Predicates = [HasVLX] in { 1934 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1935 ShuffleMask.info128>, 1936 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1937 ShuffleMask.info128>, EVEX_V128; 1938 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1939 ShuffleMask.info256>, 1940 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1941 ShuffleMask.info256>, EVEX_V256; 1942 } 1943} 1944 1945multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1946 X86FoldableSchedWrite sched, 1947 AVX512VLVectorVTInfo VTInfo, 1948 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1949 let Predicates = [Prd] in 1950 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1951 Idx.info512>, EVEX_V512; 1952 let Predicates = [Prd, HasVLX] in { 1953 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1954 Idx.info128>, EVEX_V128; 1955 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1956 Idx.info256>, EVEX_V256; 1957 } 1958} 1959 1960defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1961 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1962defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1963 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1964defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1965 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1966 VEX_W, EVEX_CD8<16, CD8VF>; 1967defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1968 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1969 EVEX_CD8<8, CD8VF>; 1970defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1971 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1972defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1973 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1974 1975//===----------------------------------------------------------------------===// 1976// AVX-512 - BLEND using mask 1977// 1978 1979multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1980 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1981 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1982 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1983 (ins _.RC:$src1, _.RC:$src2), 1984 !strconcat(OpcodeStr, 1985 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1986 EVEX_4V, Sched<[sched]>; 1987 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1988 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1989 !strconcat(OpcodeStr, 1990 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1991 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1992 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1993 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1994 !strconcat(OpcodeStr, 1995 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1996 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1997 let mayLoad = 1 in { 1998 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1999 (ins _.RC:$src1, _.MemOp:$src2), 2000 !strconcat(OpcodeStr, 2001 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 2002 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 2003 Sched<[sched.Folded, sched.ReadAfterFold]>; 2004 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2005 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2006 !strconcat(OpcodeStr, 2007 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 2008 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 2009 Sched<[sched.Folded, sched.ReadAfterFold]>; 2010 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2011 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2012 !strconcat(OpcodeStr, 2013 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 2014 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 2015 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2016 } 2017 } 2018} 2019multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 2020 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2021 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 2022 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2023 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2024 !strconcat(OpcodeStr, 2025 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2026 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2027 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2028 Sched<[sched.Folded, sched.ReadAfterFold]>; 2029 2030 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2031 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2032 !strconcat(OpcodeStr, 2033 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 2034 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2035 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2036 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2037 2038 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2039 (ins _.RC:$src1, _.ScalarMemOp:$src2), 2040 !strconcat(OpcodeStr, 2041 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2042 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2043 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2044 Sched<[sched.Folded, sched.ReadAfterFold]>; 2045 } 2046} 2047 2048multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2049 AVX512VLVectorVTInfo VTInfo> { 2050 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2051 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2052 EVEX_V512; 2053 2054 let Predicates = [HasVLX] in { 2055 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2056 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2057 EVEX_V256; 2058 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2059 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2060 EVEX_V128; 2061 } 2062} 2063 2064multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2065 AVX512VLVectorVTInfo VTInfo> { 2066 let Predicates = [HasBWI] in 2067 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2068 EVEX_V512; 2069 2070 let Predicates = [HasBWI, HasVLX] in { 2071 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2072 EVEX_V256; 2073 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2074 EVEX_V128; 2075 } 2076} 2077 2078defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2079 avx512vl_f32_info>; 2080defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2081 avx512vl_f64_info>, VEX_W; 2082defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2083 avx512vl_i32_info>; 2084defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2085 avx512vl_i64_info>, VEX_W; 2086defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2087 avx512vl_i8_info>; 2088defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2089 avx512vl_i16_info>, VEX_W; 2090 2091//===----------------------------------------------------------------------===// 2092// Compare Instructions 2093//===----------------------------------------------------------------------===// 2094 2095// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2096 2097multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2098 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2099 X86FoldableSchedWrite sched> { 2100 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2101 (outs _.KRC:$dst), 2102 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2103 "vcmp"#_.Suffix, 2104 "$cc, $src2, $src1", "$src1, $src2, $cc", 2105 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2106 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2107 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2108 let mayLoad = 1 in 2109 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2110 (outs _.KRC:$dst), 2111 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2112 "vcmp"#_.Suffix, 2113 "$cc, $src2, $src1", "$src1, $src2, $cc", 2114 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2115 timm:$cc), 2116 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2117 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2118 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2119 2120 let Uses = [MXCSR] in 2121 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2122 (outs _.KRC:$dst), 2123 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2124 "vcmp"#_.Suffix, 2125 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2126 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2127 timm:$cc), 2128 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2129 timm:$cc)>, 2130 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2131 2132 let isCodeGenOnly = 1 in { 2133 let isCommutable = 1 in 2134 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2135 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2136 !strconcat("vcmp", _.Suffix, 2137 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2138 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2139 _.FRC:$src2, 2140 timm:$cc))]>, 2141 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2142 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2143 (outs _.KRC:$dst), 2144 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2145 !strconcat("vcmp", _.Suffix, 2146 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2147 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2148 (_.ScalarLdFrag addr:$src2), 2149 timm:$cc))]>, 2150 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2151 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2152 } 2153} 2154 2155def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2156 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2157 return N->hasOneUse(); 2158}]>; 2159def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2160 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2161 return N->hasOneUse(); 2162}]>; 2163 2164let Predicates = [HasAVX512] in { 2165 let ExeDomain = SSEPackedSingle in 2166 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2167 X86cmpms_su, X86cmpmsSAE_su, 2168 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2169 let ExeDomain = SSEPackedDouble in 2170 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2171 X86cmpms_su, X86cmpmsSAE_su, 2172 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2173} 2174let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 2175 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 2176 X86cmpms_su, X86cmpmsSAE_su, 2177 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 2178 2179multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2180 X86FoldableSchedWrite sched, 2181 X86VectorVTInfo _, bit IsCommutable> { 2182 let isCommutable = IsCommutable, hasSideEffects = 0 in 2183 def rr : AVX512BI<opc, MRMSrcReg, 2184 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2185 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2186 []>, EVEX_4V, Sched<[sched]>; 2187 let mayLoad = 1, hasSideEffects = 0 in 2188 def rm : AVX512BI<opc, MRMSrcMem, 2189 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2190 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2191 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2192 let isCommutable = IsCommutable, hasSideEffects = 0 in 2193 def rrk : AVX512BI<opc, MRMSrcReg, 2194 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2195 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2196 "$dst {${mask}}, $src1, $src2}"), 2197 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2198 let mayLoad = 1, hasSideEffects = 0 in 2199 def rmk : AVX512BI<opc, MRMSrcMem, 2200 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2201 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2202 "$dst {${mask}}, $src1, $src2}"), 2203 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2204} 2205 2206multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2207 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2208 bit IsCommutable> : 2209 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2210 let mayLoad = 1, hasSideEffects = 0 in { 2211 def rmb : AVX512BI<opc, MRMSrcMem, 2212 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2213 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2214 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2215 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2216 def rmbk : AVX512BI<opc, MRMSrcMem, 2217 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2218 _.ScalarMemOp:$src2), 2219 !strconcat(OpcodeStr, 2220 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2221 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2222 []>, EVEX_4V, EVEX_K, EVEX_B, 2223 Sched<[sched.Folded, sched.ReadAfterFold]>; 2224 } 2225} 2226 2227multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2228 X86SchedWriteWidths sched, 2229 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2230 bit IsCommutable = 0> { 2231 let Predicates = [prd] in 2232 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2233 VTInfo.info512, IsCommutable>, EVEX_V512; 2234 2235 let Predicates = [prd, HasVLX] in { 2236 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2237 VTInfo.info256, IsCommutable>, EVEX_V256; 2238 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2239 VTInfo.info128, IsCommutable>, EVEX_V128; 2240 } 2241} 2242 2243multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2244 X86SchedWriteWidths sched, 2245 AVX512VLVectorVTInfo VTInfo, 2246 Predicate prd, bit IsCommutable = 0> { 2247 let Predicates = [prd] in 2248 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2249 VTInfo.info512, IsCommutable>, EVEX_V512; 2250 2251 let Predicates = [prd, HasVLX] in { 2252 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2253 VTInfo.info256, IsCommutable>, EVEX_V256; 2254 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2255 VTInfo.info128, IsCommutable>, EVEX_V128; 2256 } 2257} 2258 2259// This fragment treats X86cmpm as commutable to help match loads in both 2260// operands for PCMPEQ. 2261def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2262def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2263 (setcc node:$src1, node:$src2, SETGT)>; 2264 2265// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2266// increase the pattern complexity the way an immediate would. 2267let AddedComplexity = 2 in { 2268// FIXME: Is there a better scheduler class for VPCMP? 2269defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2270 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2271 EVEX_CD8<8, CD8VF>, VEX_WIG; 2272 2273defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2274 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2275 EVEX_CD8<16, CD8VF>, VEX_WIG; 2276 2277defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2278 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2279 EVEX_CD8<32, CD8VF>; 2280 2281defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2282 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2283 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2284 2285defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2286 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2287 EVEX_CD8<8, CD8VF>, VEX_WIG; 2288 2289defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2290 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2291 EVEX_CD8<16, CD8VF>, VEX_WIG; 2292 2293defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2294 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2295 EVEX_CD8<32, CD8VF>; 2296 2297defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2298 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2299 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2300} 2301 2302def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2303 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2304 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2305 return getI8Imm(SSECC, SDLoc(N)); 2306}]>; 2307 2308// Swapped operand version of the above. 2309def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2310 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2311 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2312 SSECC = X86::getSwappedVPCMPImm(SSECC); 2313 return getI8Imm(SSECC, SDLoc(N)); 2314}]>; 2315 2316multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2317 PatFrag Frag_su, 2318 X86FoldableSchedWrite sched, 2319 X86VectorVTInfo _, string Name> { 2320 let isCommutable = 1 in 2321 def rri : AVX512AIi8<opc, MRMSrcReg, 2322 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2323 !strconcat("vpcmp", Suffix, 2324 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2325 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2326 (_.VT _.RC:$src2), 2327 cond)))]>, 2328 EVEX_4V, Sched<[sched]>; 2329 def rmi : AVX512AIi8<opc, MRMSrcMem, 2330 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2331 !strconcat("vpcmp", Suffix, 2332 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2333 [(set _.KRC:$dst, (_.KVT 2334 (Frag:$cc 2335 (_.VT _.RC:$src1), 2336 (_.VT (_.LdFrag addr:$src2)), 2337 cond)))]>, 2338 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2339 let isCommutable = 1 in 2340 def rrik : AVX512AIi8<opc, MRMSrcReg, 2341 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2342 u8imm:$cc), 2343 !strconcat("vpcmp", Suffix, 2344 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2345 "$dst {${mask}}, $src1, $src2, $cc}"), 2346 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2347 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2348 (_.VT _.RC:$src2), 2349 cond))))]>, 2350 EVEX_4V, EVEX_K, Sched<[sched]>; 2351 def rmik : AVX512AIi8<opc, MRMSrcMem, 2352 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2353 u8imm:$cc), 2354 !strconcat("vpcmp", Suffix, 2355 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2356 "$dst {${mask}}, $src1, $src2, $cc}"), 2357 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2358 (_.KVT 2359 (Frag_su:$cc 2360 (_.VT _.RC:$src1), 2361 (_.VT (_.LdFrag addr:$src2)), 2362 cond))))]>, 2363 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2364 2365 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2366 (_.VT _.RC:$src1), cond)), 2367 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2368 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2369 2370 def : Pat<(and _.KRCWM:$mask, 2371 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2372 (_.VT _.RC:$src1), cond))), 2373 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2374 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2375 (X86pcmpm_imm_commute $cc))>; 2376} 2377 2378multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2379 PatFrag Frag_su, X86FoldableSchedWrite sched, 2380 X86VectorVTInfo _, string Name> : 2381 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2382 def rmib : AVX512AIi8<opc, MRMSrcMem, 2383 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2384 u8imm:$cc), 2385 !strconcat("vpcmp", Suffix, 2386 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2387 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2388 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2389 (_.VT _.RC:$src1), 2390 (_.BroadcastLdFrag addr:$src2), 2391 cond)))]>, 2392 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2393 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2394 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2395 _.ScalarMemOp:$src2, u8imm:$cc), 2396 !strconcat("vpcmp", Suffix, 2397 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2398 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2399 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2400 (_.KVT (Frag_su:$cc 2401 (_.VT _.RC:$src1), 2402 (_.BroadcastLdFrag addr:$src2), 2403 cond))))]>, 2404 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2405 2406 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2407 (_.VT _.RC:$src1), cond)), 2408 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2409 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2410 2411 def : Pat<(and _.KRCWM:$mask, 2412 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2413 (_.VT _.RC:$src1), cond))), 2414 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2415 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2416 (X86pcmpm_imm_commute $cc))>; 2417} 2418 2419multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2420 PatFrag Frag_su, X86SchedWriteWidths sched, 2421 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2422 let Predicates = [prd] in 2423 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2424 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2425 2426 let Predicates = [prd, HasVLX] in { 2427 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2428 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2429 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2430 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2431 } 2432} 2433 2434multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2435 PatFrag Frag_su, X86SchedWriteWidths sched, 2436 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2437 let Predicates = [prd] in 2438 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2439 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2440 2441 let Predicates = [prd, HasVLX] in { 2442 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2443 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2444 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2445 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2446 } 2447} 2448 2449def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2450 (setcc node:$src1, node:$src2, node:$cc), [{ 2451 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2452 return !ISD::isUnsignedIntSetCC(CC); 2453}], X86pcmpm_imm>; 2454 2455def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2456 (setcc node:$src1, node:$src2, node:$cc), [{ 2457 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2458 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2459}], X86pcmpm_imm>; 2460 2461def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2462 (setcc node:$src1, node:$src2, node:$cc), [{ 2463 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2464 return ISD::isUnsignedIntSetCC(CC); 2465}], X86pcmpm_imm>; 2466 2467def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2468 (setcc node:$src1, node:$src2, node:$cc), [{ 2469 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2470 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2471}], X86pcmpm_imm>; 2472 2473// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2474defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2475 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2476 EVEX_CD8<8, CD8VF>; 2477defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2478 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2479 EVEX_CD8<8, CD8VF>; 2480 2481defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2482 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2483 VEX_W, EVEX_CD8<16, CD8VF>; 2484defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2485 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2486 VEX_W, EVEX_CD8<16, CD8VF>; 2487 2488defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2489 SchedWriteVecALU, avx512vl_i32_info, 2490 HasAVX512>, EVEX_CD8<32, CD8VF>; 2491defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2492 SchedWriteVecALU, avx512vl_i32_info, 2493 HasAVX512>, EVEX_CD8<32, CD8VF>; 2494 2495defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2496 SchedWriteVecALU, avx512vl_i64_info, 2497 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2498defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2499 SchedWriteVecALU, avx512vl_i64_info, 2500 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2501 2502def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2503 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2504 return N->hasOneUse(); 2505}]>; 2506 2507def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2508 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2509 return getI8Imm(Imm, SDLoc(N)); 2510}]>; 2511 2512multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2513 string Name> { 2514let Uses = [MXCSR], mayRaiseFPException = 1 in { 2515 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2516 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2517 "vcmp"#_.Suffix, 2518 "$cc, $src2, $src1", "$src1, $src2, $cc", 2519 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2520 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2521 1>, Sched<[sched]>; 2522 2523 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2524 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2525 "vcmp"#_.Suffix, 2526 "$cc, $src2, $src1", "$src1, $src2, $cc", 2527 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2528 timm:$cc), 2529 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2530 timm:$cc)>, 2531 Sched<[sched.Folded, sched.ReadAfterFold]>; 2532 2533 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2534 (outs _.KRC:$dst), 2535 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2536 "vcmp"#_.Suffix, 2537 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2538 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2539 (X86any_cmpm (_.VT _.RC:$src1), 2540 (_.VT (_.BroadcastLdFrag addr:$src2)), 2541 timm:$cc), 2542 (X86cmpm_su (_.VT _.RC:$src1), 2543 (_.VT (_.BroadcastLdFrag addr:$src2)), 2544 timm:$cc)>, 2545 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2546 } 2547 2548 // Patterns for selecting with loads in other operand. 2549 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2550 timm:$cc), 2551 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2552 (X86cmpm_imm_commute timm:$cc))>; 2553 2554 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2555 (_.VT _.RC:$src1), 2556 timm:$cc)), 2557 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2558 _.RC:$src1, addr:$src2, 2559 (X86cmpm_imm_commute timm:$cc))>; 2560 2561 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2562 (_.VT _.RC:$src1), timm:$cc), 2563 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2564 (X86cmpm_imm_commute timm:$cc))>; 2565 2566 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2567 (_.VT _.RC:$src1), 2568 timm:$cc)), 2569 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2570 _.RC:$src1, addr:$src2, 2571 (X86cmpm_imm_commute timm:$cc))>; 2572 2573 // Patterns for mask intrinsics. 2574 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2575 (_.KVT immAllOnesV)), 2576 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2577 2578 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2579 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2580 _.RC:$src2, timm:$cc)>; 2581 2582 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2583 (_.KVT immAllOnesV)), 2584 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2585 2586 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2587 _.KRCWM:$mask), 2588 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2589 addr:$src2, timm:$cc)>; 2590 2591 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2592 (_.KVT immAllOnesV)), 2593 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2594 2595 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2596 _.KRCWM:$mask), 2597 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2598 addr:$src2, timm:$cc)>; 2599 2600 // Patterns for mask intrinsics with loads in other operand. 2601 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2602 (_.KVT immAllOnesV)), 2603 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2604 (X86cmpm_imm_commute timm:$cc))>; 2605 2606 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2607 _.KRCWM:$mask), 2608 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2609 _.RC:$src1, addr:$src2, 2610 (X86cmpm_imm_commute timm:$cc))>; 2611 2612 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2613 (_.KVT immAllOnesV)), 2614 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2615 (X86cmpm_imm_commute timm:$cc))>; 2616 2617 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2618 _.KRCWM:$mask), 2619 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2620 _.RC:$src1, addr:$src2, 2621 (X86cmpm_imm_commute timm:$cc))>; 2622} 2623 2624multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2625 // comparison code form (VCMP[EQ/LT/LE/...] 2626 let Uses = [MXCSR] in 2627 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2628 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2629 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2630 "vcmp"#_.Suffix, 2631 "$cc, {sae}, $src2, $src1", 2632 "$src1, $src2, {sae}, $cc", 2633 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2634 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2635 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2636 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2637 EVEX_B, Sched<[sched]>; 2638} 2639 2640multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2641 Predicate Pred = HasAVX512> { 2642 let Predicates = [Pred] in { 2643 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2644 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2645 2646 } 2647 let Predicates = [Pred,HasVLX] in { 2648 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2649 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2650 } 2651} 2652 2653defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2654 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2655defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2656 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2657defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2658 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA; 2659 2660// Patterns to select fp compares with load as first operand. 2661let Predicates = [HasAVX512] in { 2662 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2663 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2664 2665 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2666 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2667} 2668 2669let Predicates = [HasFP16] in { 2670 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2671 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2672} 2673 2674// ---------------------------------------------------------------- 2675// FPClass 2676 2677def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2678 (X86Vfpclasss node:$src1, node:$src2), [{ 2679 return N->hasOneUse(); 2680}]>; 2681 2682def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2683 (X86Vfpclass node:$src1, node:$src2), [{ 2684 return N->hasOneUse(); 2685}]>; 2686 2687//handle fpclass instruction mask = op(reg_scalar,imm) 2688// op(mem_scalar,imm) 2689multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2690 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2691 Predicate prd> { 2692 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2693 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2694 (ins _.RC:$src1, i32u8imm:$src2), 2695 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2696 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2697 (i32 timm:$src2)))]>, 2698 Sched<[sched]>; 2699 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2700 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2701 OpcodeStr#_.Suffix# 2702 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2703 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2704 (X86Vfpclasss_su (_.VT _.RC:$src1), 2705 (i32 timm:$src2))))]>, 2706 EVEX_K, Sched<[sched]>; 2707 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2708 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2709 OpcodeStr#_.Suffix# 2710 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2711 [(set _.KRC:$dst, 2712 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2713 (i32 timm:$src2)))]>, 2714 Sched<[sched.Folded, sched.ReadAfterFold]>; 2715 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2716 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2717 OpcodeStr#_.Suffix# 2718 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2719 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2720 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2721 (i32 timm:$src2))))]>, 2722 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2723 } 2724} 2725 2726//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2727// fpclass(reg_vec, mem_vec, imm) 2728// fpclass(reg_vec, broadcast(eltVt), imm) 2729multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2730 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2731 string mem>{ 2732 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2733 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2734 (ins _.RC:$src1, i32u8imm:$src2), 2735 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2736 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2737 (i32 timm:$src2)))]>, 2738 Sched<[sched]>; 2739 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2740 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2741 OpcodeStr#_.Suffix# 2742 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2743 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2744 (X86Vfpclass_su (_.VT _.RC:$src1), 2745 (i32 timm:$src2))))]>, 2746 EVEX_K, Sched<[sched]>; 2747 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2748 (ins _.MemOp:$src1, i32u8imm:$src2), 2749 OpcodeStr#_.Suffix#"{"#mem#"}"# 2750 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2751 [(set _.KRC:$dst,(X86Vfpclass 2752 (_.VT (_.LdFrag addr:$src1)), 2753 (i32 timm:$src2)))]>, 2754 Sched<[sched.Folded, sched.ReadAfterFold]>; 2755 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2756 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2757 OpcodeStr#_.Suffix#"{"#mem#"}"# 2758 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2759 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2760 (_.VT (_.LdFrag addr:$src1)), 2761 (i32 timm:$src2))))]>, 2762 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2763 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2764 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2765 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2766 _.BroadcastStr#", $dst|$dst, ${src1}" 2767 #_.BroadcastStr#", $src2}", 2768 [(set _.KRC:$dst,(X86Vfpclass 2769 (_.VT (_.BroadcastLdFrag addr:$src1)), 2770 (i32 timm:$src2)))]>, 2771 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2772 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2773 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2774 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2775 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2776 _.BroadcastStr#", $src2}", 2777 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2778 (_.VT (_.BroadcastLdFrag addr:$src1)), 2779 (i32 timm:$src2))))]>, 2780 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2781 } 2782 2783 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2784 // the memory form. 2785 def : InstAlias<OpcodeStr#_.Suffix#mem# 2786 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2787 (!cast<Instruction>(NAME#"rr") 2788 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2789 def : InstAlias<OpcodeStr#_.Suffix#mem# 2790 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2791 (!cast<Instruction>(NAME#"rrk") 2792 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2793 def : InstAlias<OpcodeStr#_.Suffix#mem# 2794 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2795 _.BroadcastStr#", $src2}", 2796 (!cast<Instruction>(NAME#"rmb") 2797 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2798 def : InstAlias<OpcodeStr#_.Suffix#mem# 2799 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2800 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2801 (!cast<Instruction>(NAME#"rmbk") 2802 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2803} 2804 2805multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2806 bits<8> opc, X86SchedWriteWidths sched, 2807 Predicate prd>{ 2808 let Predicates = [prd] in { 2809 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2810 _.info512, "z">, EVEX_V512; 2811 } 2812 let Predicates = [prd, HasVLX] in { 2813 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2814 _.info128, "x">, EVEX_V128; 2815 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2816 _.info256, "y">, EVEX_V256; 2817 } 2818} 2819 2820multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2821 bits<8> opcScalar, X86SchedWriteWidths sched> { 2822 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2823 sched, HasFP16>, 2824 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2825 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2826 sched.Scl, f16x_info, HasFP16>, 2827 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2828 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2829 sched, HasDQI>, 2830 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2831 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2832 sched, HasDQI>, 2833 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W; 2834 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2835 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2836 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2837 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2838 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2839 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W; 2840} 2841 2842defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2843 2844//----------------------------------------------------------------- 2845// Mask register copy, including 2846// - copy between mask registers 2847// - load/store mask registers 2848// - copy from GPR to mask register and vice versa 2849// 2850multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2851 string OpcodeStr, RegisterClass KRC, 2852 ValueType vvt, X86MemOperand x86memop> { 2853 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2854 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2855 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2856 Sched<[WriteMove]>; 2857 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2858 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2859 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2860 Sched<[WriteLoad]>; 2861 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2862 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2863 [(store KRC:$src, addr:$dst)]>, 2864 Sched<[WriteStore]>; 2865} 2866 2867multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2868 string OpcodeStr, 2869 RegisterClass KRC, RegisterClass GRC> { 2870 let hasSideEffects = 0 in { 2871 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2872 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2873 Sched<[WriteMove]>; 2874 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2875 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2876 Sched<[WriteMove]>; 2877 } 2878} 2879 2880let Predicates = [HasDQI] in 2881 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2882 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2883 VEX, PD; 2884 2885let Predicates = [HasAVX512] in 2886 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2887 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2888 VEX, PS; 2889 2890let Predicates = [HasBWI] in { 2891 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2892 VEX, PD, VEX_W; 2893 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2894 VEX, XD; 2895 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2896 VEX, PS, VEX_W; 2897 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2898 VEX, XD, VEX_W; 2899} 2900 2901// GR from/to mask register 2902def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2903 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2904def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2905 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2906def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2907 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2908 2909def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2910 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2911def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2912 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2913 2914def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2915 (KMOVWrk VK16:$src)>; 2916def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2917 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2918def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2919 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2920def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2921 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2922 2923def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2924 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2925def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2926 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2927def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2928 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2929def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2930 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2931 2932def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2933 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2934def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2935 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2936def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2937 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2938def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2939 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2940 2941// Load/store kreg 2942let Predicates = [HasDQI] in { 2943 def : Pat<(v1i1 (load addr:$src)), 2944 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2945 def : Pat<(v2i1 (load addr:$src)), 2946 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2947 def : Pat<(v4i1 (load addr:$src)), 2948 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2949} 2950 2951let Predicates = [HasAVX512] in { 2952 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2953 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2954 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2955 (KMOVWkm addr:$src)>; 2956} 2957 2958def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2959 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2960 SDTCVecEltisVT<1, i1>, 2961 SDTCisPtrTy<2>]>>; 2962 2963let Predicates = [HasAVX512] in { 2964 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2965 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2966 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2967 2968 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2969 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2970 2971 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2972 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2973 2974 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2975 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2976 } 2977 2978 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2979 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2980 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2981 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2982 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2983 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2984 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2985 2986 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2987 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2988 (KMOVWkr (AND32ri8 2989 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2990 (i32 1)))>; 2991} 2992 2993// Mask unary operation 2994// - KNOT 2995multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2996 RegisterClass KRC, SDPatternOperator OpNode, 2997 X86FoldableSchedWrite sched, Predicate prd> { 2998 let Predicates = [prd] in 2999 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 3000 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3001 [(set KRC:$dst, (OpNode KRC:$src))]>, 3002 Sched<[sched]>; 3003} 3004 3005multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 3006 SDPatternOperator OpNode, 3007 X86FoldableSchedWrite sched> { 3008 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3009 sched, HasDQI>, VEX, PD; 3010 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3011 sched, HasAVX512>, VEX, PS; 3012 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3013 sched, HasBWI>, VEX, PD, VEX_W; 3014 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3015 sched, HasBWI>, VEX, PS, VEX_W; 3016} 3017 3018// TODO - do we need a X86SchedWriteWidths::KMASK type? 3019defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 3020 3021// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 3022let Predicates = [HasAVX512, NoDQI] in 3023def : Pat<(vnot VK8:$src), 3024 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 3025 3026def : Pat<(vnot VK4:$src), 3027 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 3028def : Pat<(vnot VK2:$src), 3029 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 3030def : Pat<(vnot VK1:$src), 3031 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 3032 3033// Mask binary operation 3034// - KAND, KANDN, KOR, KXNOR, KXOR 3035multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 3036 RegisterClass KRC, SDPatternOperator OpNode, 3037 X86FoldableSchedWrite sched, Predicate prd, 3038 bit IsCommutable> { 3039 let Predicates = [prd], isCommutable = IsCommutable in 3040 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 3041 !strconcat(OpcodeStr, 3042 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3043 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 3044 Sched<[sched]>; 3045} 3046 3047multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 3048 SDPatternOperator OpNode, 3049 X86FoldableSchedWrite sched, bit IsCommutable, 3050 Predicate prdW = HasAVX512> { 3051 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3052 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 3053 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3054 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 3055 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3056 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 3057 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3058 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3059} 3060 3061// These nodes use 'vnot' instead of 'not' to support vectors. 3062def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3063def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3064 3065// TODO - do we need a X86SchedWriteWidths::KMASK type? 3066defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3067defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3068defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3069defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3070defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3071defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3072 3073multiclass avx512_binop_pat<SDPatternOperator VOpNode, 3074 Instruction Inst> { 3075 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3076 // for the DQI set, this type is legal and KxxxB instruction is used 3077 let Predicates = [NoDQI] in 3078 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3079 (COPY_TO_REGCLASS 3080 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3081 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3082 3083 // All types smaller than 8 bits require conversion anyway 3084 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 3085 (COPY_TO_REGCLASS (Inst 3086 (COPY_TO_REGCLASS VK1:$src1, VK16), 3087 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3088 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3089 (COPY_TO_REGCLASS (Inst 3090 (COPY_TO_REGCLASS VK2:$src1, VK16), 3091 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 3092 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3093 (COPY_TO_REGCLASS (Inst 3094 (COPY_TO_REGCLASS VK4:$src1, VK16), 3095 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 3096} 3097 3098defm : avx512_binop_pat<and, KANDWrr>; 3099defm : avx512_binop_pat<vandn, KANDNWrr>; 3100defm : avx512_binop_pat<or, KORWrr>; 3101defm : avx512_binop_pat<vxnor, KXNORWrr>; 3102defm : avx512_binop_pat<xor, KXORWrr>; 3103 3104// Mask unpacking 3105multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3106 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3107 Predicate prd> { 3108 let Predicates = [prd] in { 3109 let hasSideEffects = 0 in 3110 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3111 (ins Src.KRC:$src1, Src.KRC:$src2), 3112 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3113 VEX_4V, VEX_L, Sched<[sched]>; 3114 3115 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3116 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 3117 } 3118} 3119 3120defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3121defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3122defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3123 3124// Mask bit testing 3125multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3126 SDNode OpNode, X86FoldableSchedWrite sched, 3127 Predicate prd> { 3128 let Predicates = [prd], Defs = [EFLAGS] in 3129 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3130 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3131 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3132 Sched<[sched]>; 3133} 3134 3135multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3136 X86FoldableSchedWrite sched, 3137 Predicate prdW = HasAVX512> { 3138 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3139 VEX, PD; 3140 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3141 VEX, PS; 3142 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3143 VEX, PS, VEX_W; 3144 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3145 VEX, PD, VEX_W; 3146} 3147 3148// TODO - do we need a X86SchedWriteWidths::KMASK type? 3149defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3150defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3151 3152// Mask shift 3153multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3154 SDNode OpNode, X86FoldableSchedWrite sched> { 3155 let Predicates = [HasAVX512] in 3156 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3157 !strconcat(OpcodeStr, 3158 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3159 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3160 Sched<[sched]>; 3161} 3162 3163multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3164 SDNode OpNode, X86FoldableSchedWrite sched> { 3165 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3166 sched>, VEX, TAPD, VEX_W; 3167 let Predicates = [HasDQI] in 3168 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3169 sched>, VEX, TAPD; 3170 let Predicates = [HasBWI] in { 3171 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3172 sched>, VEX, TAPD, VEX_W; 3173 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3174 sched>, VEX, TAPD; 3175 } 3176} 3177 3178defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3179defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3180 3181// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3182multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3183 string InstStr, 3184 X86VectorVTInfo Narrow, 3185 X86VectorVTInfo Wide> { 3186def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3187 (Narrow.VT Narrow.RC:$src2), cond)), 3188 (COPY_TO_REGCLASS 3189 (!cast<Instruction>(InstStr#"Zrri") 3190 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3191 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3192 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3193 3194def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3195 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3196 (Narrow.VT Narrow.RC:$src2), 3197 cond)))), 3198 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3199 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3200 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3201 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3202 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3203} 3204 3205multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3206 string InstStr, 3207 X86VectorVTInfo Narrow, 3208 X86VectorVTInfo Wide> { 3209// Broadcast load. 3210def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3211 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3212 (COPY_TO_REGCLASS 3213 (!cast<Instruction>(InstStr#"Zrmib") 3214 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3215 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3216 3217def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3218 (Narrow.KVT 3219 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3220 (Narrow.BroadcastLdFrag addr:$src2), 3221 cond)))), 3222 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3223 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3224 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3225 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3226 3227// Commuted with broadcast load. 3228def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3229 (Narrow.VT Narrow.RC:$src1), 3230 cond)), 3231 (COPY_TO_REGCLASS 3232 (!cast<Instruction>(InstStr#"Zrmib") 3233 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3234 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3235 3236def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3237 (Narrow.KVT 3238 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3239 (Narrow.VT Narrow.RC:$src1), 3240 cond)))), 3241 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3242 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3243 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3244 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3245} 3246 3247// Same as above, but for fp types which don't use PatFrags. 3248multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3249 X86VectorVTInfo Narrow, 3250 X86VectorVTInfo Wide> { 3251def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3252 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3253 (COPY_TO_REGCLASS 3254 (!cast<Instruction>(InstStr#"Zrri") 3255 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3256 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3257 timm:$cc), Narrow.KRC)>; 3258 3259def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3260 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3261 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3262 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3263 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3264 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3265 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3266 timm:$cc), Narrow.KRC)>; 3267 3268// Broadcast load. 3269def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3270 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3271 (COPY_TO_REGCLASS 3272 (!cast<Instruction>(InstStr#"Zrmbi") 3273 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3274 addr:$src2, timm:$cc), Narrow.KRC)>; 3275 3276def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3277 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3278 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3279 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3280 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3281 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3282 addr:$src2, timm:$cc), Narrow.KRC)>; 3283 3284// Commuted with broadcast load. 3285def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3286 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3287 (COPY_TO_REGCLASS 3288 (!cast<Instruction>(InstStr#"Zrmbi") 3289 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3290 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3291 3292def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3293 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3294 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3295 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3296 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3297 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3298 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3299} 3300 3301let Predicates = [HasAVX512, NoVLX] in { 3302 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3303 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3304 3305 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3306 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3307 3308 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3309 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3310 3311 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3312 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3313 3314 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3315 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3316 3317 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3318 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3319 3320 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3321 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3322 3323 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3324 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3325 3326 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3327 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3328 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3329 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3330} 3331 3332let Predicates = [HasBWI, NoVLX] in { 3333 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3334 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3335 3336 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3337 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3338 3339 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3340 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3341 3342 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3343 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3344} 3345 3346// Mask setting all 0s or 1s 3347multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3348 let Predicates = [HasAVX512] in 3349 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3350 SchedRW = [WriteZero] in 3351 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3352 [(set KRC:$dst, (VT Val))]>; 3353} 3354 3355multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3356 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3357 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3358 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3359} 3360 3361defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3362defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3363 3364// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3365let Predicates = [HasAVX512] in { 3366 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3367 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3368 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3369 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3370 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3371 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3372 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3373 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3374} 3375 3376// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3377multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3378 RegisterClass RC, ValueType VT> { 3379 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3380 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3381 3382 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3383 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3384} 3385defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3386defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3387defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3388defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3389defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3390defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3391 3392defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3393defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3394defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3395defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3396defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3397 3398defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3399defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3400defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3401defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3402 3403defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3404defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3405defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3406 3407defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3408defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3409 3410defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3411 3412//===----------------------------------------------------------------------===// 3413// AVX-512 - Aligned and unaligned load and store 3414// 3415 3416multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3417 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3418 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3419 bit NoRMPattern = 0, 3420 SDPatternOperator SelectOprr = vselect> { 3421 let hasSideEffects = 0 in { 3422 let isMoveReg = 1 in 3423 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3424 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3425 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3426 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3427 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3428 (ins _.KRCWM:$mask, _.RC:$src), 3429 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3430 "${dst} {${mask}} {z}, $src}"), 3431 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3432 (_.VT _.RC:$src), 3433 _.ImmAllZerosV)))], _.ExeDomain>, 3434 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3435 3436 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3437 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3438 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3439 !if(NoRMPattern, [], 3440 [(set _.RC:$dst, 3441 (_.VT (ld_frag addr:$src)))]), 3442 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3443 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3444 3445 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3446 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3447 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3448 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3449 "${dst} {${mask}}, $src1}"), 3450 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3451 (_.VT _.RC:$src1), 3452 (_.VT _.RC:$src0))))], _.ExeDomain>, 3453 EVEX, EVEX_K, Sched<[Sched.RR]>; 3454 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3455 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3456 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3457 "${dst} {${mask}}, $src1}"), 3458 [(set _.RC:$dst, (_.VT 3459 (vselect_mask _.KRCWM:$mask, 3460 (_.VT (ld_frag addr:$src1)), 3461 (_.VT _.RC:$src0))))], _.ExeDomain>, 3462 EVEX, EVEX_K, Sched<[Sched.RM]>; 3463 } 3464 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3465 (ins _.KRCWM:$mask, _.MemOp:$src), 3466 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3467 "${dst} {${mask}} {z}, $src}", 3468 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3469 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3470 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3471 } 3472 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3473 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3474 3475 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3476 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3477 3478 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3479 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3480 _.KRCWM:$mask, addr:$ptr)>; 3481} 3482 3483multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3484 AVX512VLVectorVTInfo _, Predicate prd, 3485 X86SchedWriteMoveLSWidths Sched, 3486 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3487 let Predicates = [prd] in 3488 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3489 _.info512.AlignedLdFrag, masked_load_aligned, 3490 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3491 3492 let Predicates = [prd, HasVLX] in { 3493 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3494 _.info256.AlignedLdFrag, masked_load_aligned, 3495 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3496 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3497 _.info128.AlignedLdFrag, masked_load_aligned, 3498 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3499 } 3500} 3501 3502multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3503 AVX512VLVectorVTInfo _, Predicate prd, 3504 X86SchedWriteMoveLSWidths Sched, 3505 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3506 SDPatternOperator SelectOprr = vselect> { 3507 let Predicates = [prd] in 3508 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3509 masked_load, Sched.ZMM, "", 3510 NoRMPattern, SelectOprr>, EVEX_V512; 3511 3512 let Predicates = [prd, HasVLX] in { 3513 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3514 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3515 NoRMPattern, SelectOprr>, EVEX_V256; 3516 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3517 masked_load, Sched.XMM, EVEX2VEXOvrd, 3518 NoRMPattern, SelectOprr>, EVEX_V128; 3519 } 3520} 3521 3522multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3523 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3524 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3525 bit NoMRPattern = 0> { 3526 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3527 let isMoveReg = 1 in 3528 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3529 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3530 [], _.ExeDomain>, EVEX, 3531 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3532 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3533 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3534 (ins _.KRCWM:$mask, _.RC:$src), 3535 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3536 "${dst} {${mask}}, $src}", 3537 [], _.ExeDomain>, EVEX, EVEX_K, 3538 FoldGenData<BaseName#_.ZSuffix#rrk>, 3539 Sched<[Sched.RR]>; 3540 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3541 (ins _.KRCWM:$mask, _.RC:$src), 3542 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3543 "${dst} {${mask}} {z}, $src}", 3544 [], _.ExeDomain>, EVEX, EVEX_KZ, 3545 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3546 Sched<[Sched.RR]>; 3547 } 3548 3549 let hasSideEffects = 0, mayStore = 1 in 3550 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3551 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3552 !if(NoMRPattern, [], 3553 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3554 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3555 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3556 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3557 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3558 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3559 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3560 NotMemoryFoldable; 3561 3562 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3563 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3564 _.KRCWM:$mask, _.RC:$src)>; 3565 3566 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3567 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3568 _.RC:$dst, _.RC:$src), 0>; 3569 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3570 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3571 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3572 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3573 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3574 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3575} 3576 3577multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3578 AVX512VLVectorVTInfo _, Predicate prd, 3579 X86SchedWriteMoveLSWidths Sched, 3580 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3581 let Predicates = [prd] in 3582 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3583 masked_store, Sched.ZMM, "", 3584 NoMRPattern>, EVEX_V512; 3585 let Predicates = [prd, HasVLX] in { 3586 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3587 masked_store, Sched.YMM, 3588 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3589 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3590 masked_store, Sched.XMM, EVEX2VEXOvrd, 3591 NoMRPattern>, EVEX_V128; 3592 } 3593} 3594 3595multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3596 AVX512VLVectorVTInfo _, Predicate prd, 3597 X86SchedWriteMoveLSWidths Sched, 3598 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3599 let Predicates = [prd] in 3600 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3601 masked_store_aligned, Sched.ZMM, "", 3602 NoMRPattern>, EVEX_V512; 3603 3604 let Predicates = [prd, HasVLX] in { 3605 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3606 masked_store_aligned, Sched.YMM, 3607 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3608 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3609 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3610 NoMRPattern>, EVEX_V128; 3611 } 3612} 3613 3614defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3615 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3616 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3617 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3618 PS, EVEX_CD8<32, CD8VF>; 3619 3620defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3621 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3622 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3623 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3624 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3625 3626defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3627 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3628 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3629 SchedWriteFMoveLS, "VMOVUPS">, 3630 PS, EVEX_CD8<32, CD8VF>; 3631 3632defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3633 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3634 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3635 SchedWriteFMoveLS, "VMOVUPD">, 3636 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3637 3638defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3639 HasAVX512, SchedWriteVecMoveLS, 3640 "VMOVDQA", 1>, 3641 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3642 HasAVX512, SchedWriteVecMoveLS, 3643 "VMOVDQA", 1>, 3644 PD, EVEX_CD8<32, CD8VF>; 3645 3646defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3647 HasAVX512, SchedWriteVecMoveLS, 3648 "VMOVDQA">, 3649 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3650 HasAVX512, SchedWriteVecMoveLS, 3651 "VMOVDQA">, 3652 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3653 3654defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3655 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3656 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3657 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3658 XD, EVEX_CD8<8, CD8VF>; 3659 3660defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3661 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3662 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3663 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3664 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3665 3666defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3667 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3668 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3669 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3670 XS, EVEX_CD8<32, CD8VF>; 3671 3672defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3673 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3674 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3675 SchedWriteVecMoveLS, "VMOVDQU">, 3676 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3677 3678// Special instructions to help with spilling when we don't have VLX. We need 3679// to load or store from a ZMM register instead. These are converted in 3680// expandPostRAPseudos. 3681let isReMaterializable = 1, canFoldAsLoad = 1, 3682 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3683def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3684 "", []>, Sched<[WriteFLoadX]>; 3685def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3686 "", []>, Sched<[WriteFLoadY]>; 3687def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3688 "", []>, Sched<[WriteFLoadX]>; 3689def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3690 "", []>, Sched<[WriteFLoadY]>; 3691} 3692 3693let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3694def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3695 "", []>, Sched<[WriteFStoreX]>; 3696def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3697 "", []>, Sched<[WriteFStoreY]>; 3698def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3699 "", []>, Sched<[WriteFStoreX]>; 3700def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3701 "", []>, Sched<[WriteFStoreY]>; 3702} 3703 3704def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3705 (v8i64 VR512:$src))), 3706 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3707 VK8), VR512:$src)>; 3708 3709def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3710 (v16i32 VR512:$src))), 3711 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3712 3713// These patterns exist to prevent the above patterns from introducing a second 3714// mask inversion when one already exists. 3715def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3716 (v8i64 immAllZerosV), 3717 (v8i64 VR512:$src))), 3718 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3719def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3720 (v16i32 immAllZerosV), 3721 (v16i32 VR512:$src))), 3722 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3723 3724multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3725 X86VectorVTInfo Wide> { 3726 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3727 Narrow.RC:$src1, Narrow.RC:$src0)), 3728 (EXTRACT_SUBREG 3729 (Wide.VT 3730 (!cast<Instruction>(InstrStr#"rrk") 3731 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3732 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3733 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3734 Narrow.SubRegIdx)>; 3735 3736 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3737 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3738 (EXTRACT_SUBREG 3739 (Wide.VT 3740 (!cast<Instruction>(InstrStr#"rrkz") 3741 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3742 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3743 Narrow.SubRegIdx)>; 3744} 3745 3746// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3747// available. Use a 512-bit operation and extract. 3748let Predicates = [HasAVX512, NoVLX] in { 3749 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3750 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3751 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3752 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3753 3754 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3755 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3756 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3757 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3758} 3759 3760let Predicates = [HasBWI, NoVLX] in { 3761 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3762 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3763 3764 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3765 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3766} 3767 3768let Predicates = [HasAVX512] in { 3769 // 512-bit load. 3770 def : Pat<(alignedloadv16i32 addr:$src), 3771 (VMOVDQA64Zrm addr:$src)>; 3772 def : Pat<(alignedloadv32i16 addr:$src), 3773 (VMOVDQA64Zrm addr:$src)>; 3774 def : Pat<(alignedloadv64i8 addr:$src), 3775 (VMOVDQA64Zrm addr:$src)>; 3776 def : Pat<(loadv16i32 addr:$src), 3777 (VMOVDQU64Zrm addr:$src)>; 3778 def : Pat<(loadv32i16 addr:$src), 3779 (VMOVDQU64Zrm addr:$src)>; 3780 def : Pat<(loadv64i8 addr:$src), 3781 (VMOVDQU64Zrm addr:$src)>; 3782 3783 // 512-bit store. 3784 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3785 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3786 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3787 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3788 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3789 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3790 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3791 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3792 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3793 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3794 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3795 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3796} 3797 3798let Predicates = [HasVLX] in { 3799 // 128-bit load. 3800 def : Pat<(alignedloadv4i32 addr:$src), 3801 (VMOVDQA64Z128rm addr:$src)>; 3802 def : Pat<(alignedloadv8i16 addr:$src), 3803 (VMOVDQA64Z128rm addr:$src)>; 3804 def : Pat<(alignedloadv16i8 addr:$src), 3805 (VMOVDQA64Z128rm addr:$src)>; 3806 def : Pat<(loadv4i32 addr:$src), 3807 (VMOVDQU64Z128rm addr:$src)>; 3808 def : Pat<(loadv8i16 addr:$src), 3809 (VMOVDQU64Z128rm addr:$src)>; 3810 def : Pat<(loadv16i8 addr:$src), 3811 (VMOVDQU64Z128rm addr:$src)>; 3812 3813 // 128-bit store. 3814 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3815 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3816 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3817 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3818 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3819 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3820 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3821 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3822 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3823 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3824 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3825 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3826 3827 // 256-bit load. 3828 def : Pat<(alignedloadv8i32 addr:$src), 3829 (VMOVDQA64Z256rm addr:$src)>; 3830 def : Pat<(alignedloadv16i16 addr:$src), 3831 (VMOVDQA64Z256rm addr:$src)>; 3832 def : Pat<(alignedloadv32i8 addr:$src), 3833 (VMOVDQA64Z256rm addr:$src)>; 3834 def : Pat<(loadv8i32 addr:$src), 3835 (VMOVDQU64Z256rm addr:$src)>; 3836 def : Pat<(loadv16i16 addr:$src), 3837 (VMOVDQU64Z256rm addr:$src)>; 3838 def : Pat<(loadv32i8 addr:$src), 3839 (VMOVDQU64Z256rm addr:$src)>; 3840 3841 // 256-bit store. 3842 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3843 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3844 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3845 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3846 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3847 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3848 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3849 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3850 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3851 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3852 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3853 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3854} 3855let Predicates = [HasFP16] in { 3856 def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))), 3857 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3858 def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)), 3859 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3860 def : Pat<(v32f16 (alignedloadv32f16 addr:$src)), 3861 (VMOVAPSZrm addr:$src)>; 3862 def : Pat<(v32f16 (vselect VK32WM:$mask, 3863 (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))), 3864 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3865 def : Pat<(v32f16 (vselect VK32WM:$mask, 3866 (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)), 3867 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3868 def : Pat<(v32f16 (loadv32f16 addr:$src)), 3869 (VMOVUPSZrm addr:$src)>; 3870 def : Pat<(v32f16 (vselect VK32WM:$mask, 3871 (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))), 3872 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3873 def : Pat<(v32f16 (vselect VK32WM:$mask, 3874 (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)), 3875 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3876 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))), 3877 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3878 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)), 3879 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3880 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)), 3881 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3882 3883 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3884 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3885 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3886 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3887 def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask), 3888 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3889} 3890let Predicates = [HasFP16, HasVLX] in { 3891 def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))), 3892 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3893 def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)), 3894 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3895 def : Pat<(v16f16 (alignedloadv16f16 addr:$src)), 3896 (VMOVAPSZ256rm addr:$src)>; 3897 def : Pat<(v16f16 (vselect VK16WM:$mask, 3898 (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))), 3899 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3900 def : Pat<(v16f16 (vselect VK16WM:$mask, 3901 (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)), 3902 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3903 def : Pat<(v16f16 (loadv16f16 addr:$src)), 3904 (VMOVUPSZ256rm addr:$src)>; 3905 def : Pat<(v16f16 (vselect VK16WM:$mask, 3906 (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))), 3907 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3908 def : Pat<(v16f16 (vselect VK16WM:$mask, 3909 (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)), 3910 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3911 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))), 3912 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3913 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)), 3914 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3915 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)), 3916 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3917 3918 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3919 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3920 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3921 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3922 def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask), 3923 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3924 3925 def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))), 3926 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3927 def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)), 3928 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3929 def : Pat<(v8f16 (alignedloadv8f16 addr:$src)), 3930 (VMOVAPSZ128rm addr:$src)>; 3931 def : Pat<(v8f16 (vselect VK8WM:$mask, 3932 (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))), 3933 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3934 def : Pat<(v8f16 (vselect VK8WM:$mask, 3935 (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)), 3936 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3937 def : Pat<(v8f16 (loadv8f16 addr:$src)), 3938 (VMOVUPSZ128rm addr:$src)>; 3939 def : Pat<(v8f16 (vselect VK8WM:$mask, 3940 (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))), 3941 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3942 def : Pat<(v8f16 (vselect VK8WM:$mask, 3943 (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)), 3944 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3945 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))), 3946 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3947 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)), 3948 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3949 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)), 3950 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3951 3952 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3953 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3954 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3955 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3956 def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask), 3957 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3958} 3959 3960// Move Int Doubleword to Packed Double Int 3961// 3962let ExeDomain = SSEPackedInt in { 3963def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3964 "vmovd\t{$src, $dst|$dst, $src}", 3965 [(set VR128X:$dst, 3966 (v4i32 (scalar_to_vector GR32:$src)))]>, 3967 EVEX, Sched<[WriteVecMoveFromGpr]>; 3968def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3969 "vmovd\t{$src, $dst|$dst, $src}", 3970 [(set VR128X:$dst, 3971 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3972 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3973def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3974 "vmovq\t{$src, $dst|$dst, $src}", 3975 [(set VR128X:$dst, 3976 (v2i64 (scalar_to_vector GR64:$src)))]>, 3977 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3978let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3979def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3980 (ins i64mem:$src), 3981 "vmovq\t{$src, $dst|$dst, $src}", []>, 3982 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3983let isCodeGenOnly = 1 in { 3984def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3985 "vmovq\t{$src, $dst|$dst, $src}", 3986 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3987 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3988def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3989 "vmovq\t{$src, $dst|$dst, $src}", 3990 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3991 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3992} 3993} // ExeDomain = SSEPackedInt 3994 3995// Move Int Doubleword to Single Scalar 3996// 3997let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3998def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3999 "vmovd\t{$src, $dst|$dst, $src}", 4000 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 4001 EVEX, Sched<[WriteVecMoveFromGpr]>; 4002} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4003 4004// Move doubleword from xmm register to r/m32 4005// 4006let ExeDomain = SSEPackedInt in { 4007def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 4008 "vmovd\t{$src, $dst|$dst, $src}", 4009 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 4010 (iPTR 0)))]>, 4011 EVEX, Sched<[WriteVecMoveToGpr]>; 4012def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 4013 (ins i32mem:$dst, VR128X:$src), 4014 "vmovd\t{$src, $dst|$dst, $src}", 4015 [(store (i32 (extractelt (v4i32 VR128X:$src), 4016 (iPTR 0))), addr:$dst)]>, 4017 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 4018} // ExeDomain = SSEPackedInt 4019 4020// Move quadword from xmm1 register to r/m64 4021// 4022let ExeDomain = SSEPackedInt in { 4023def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 4024 "vmovq\t{$src, $dst|$dst, $src}", 4025 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 4026 (iPTR 0)))]>, 4027 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 4028 Requires<[HasAVX512]>; 4029 4030let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4031def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 4032 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 4033 EVEX, VEX_W, Sched<[WriteVecStore]>, 4034 Requires<[HasAVX512, In64BitMode]>; 4035 4036def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 4037 (ins i64mem:$dst, VR128X:$src), 4038 "vmovq\t{$src, $dst|$dst, $src}", 4039 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 4040 addr:$dst)]>, 4041 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 4042 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 4043 4044let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 4045def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 4046 (ins VR128X:$src), 4047 "vmovq\t{$src, $dst|$dst, $src}", []>, 4048 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 4049} // ExeDomain = SSEPackedInt 4050 4051def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 4052 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 4053 4054let Predicates = [HasAVX512] in { 4055 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 4056 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 4057} 4058 4059// Move Scalar Single to Double Int 4060// 4061let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4062def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 4063 (ins FR32X:$src), 4064 "vmovd\t{$src, $dst|$dst, $src}", 4065 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 4066 EVEX, Sched<[WriteVecMoveToGpr]>; 4067} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4068 4069// Move Quadword Int to Packed Quadword Int 4070// 4071let ExeDomain = SSEPackedInt in { 4072def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 4073 (ins i64mem:$src), 4074 "vmovq\t{$src, $dst|$dst, $src}", 4075 [(set VR128X:$dst, 4076 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4077 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 4078} // ExeDomain = SSEPackedInt 4079 4080// Allow "vmovd" but print "vmovq". 4081def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4082 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 4083def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4084 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 4085 4086// Conversions between masks and scalar fp. 4087def : Pat<(v32i1 (bitconvert FR32X:$src)), 4088 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 4089def : Pat<(f32 (bitconvert VK32:$src)), 4090 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 4091 4092def : Pat<(v64i1 (bitconvert FR64X:$src)), 4093 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 4094def : Pat<(f64 (bitconvert VK64:$src)), 4095 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 4096 4097//===----------------------------------------------------------------------===// 4098// AVX-512 MOVSH, MOVSS, MOVSD 4099//===----------------------------------------------------------------------===// 4100 4101multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 4102 X86VectorVTInfo _, 4103 list<Predicate> prd = [HasAVX512, OptForSize]> { 4104 let Predicates = prd in 4105 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4106 (ins _.RC:$src1, _.RC:$src2), 4107 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4108 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 4109 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 4110 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4111 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4112 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 4113 "$dst {${mask}} {z}, $src1, $src2}"), 4114 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4115 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4116 _.ImmAllZerosV)))], 4117 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 4118 let Constraints = "$src0 = $dst" in 4119 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4120 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4121 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 4122 "$dst {${mask}}, $src1, $src2}"), 4123 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4124 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4125 (_.VT _.RC:$src0))))], 4126 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 4127 let canFoldAsLoad = 1, isReMaterializable = 1 in { 4128 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 4129 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4130 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 4131 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4132 // _alt version uses FR32/FR64 register class. 4133 let isCodeGenOnly = 1 in 4134 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 4135 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4136 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 4137 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4138 } 4139 let mayLoad = 1, hasSideEffects = 0 in { 4140 let Constraints = "$src0 = $dst" in 4141 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4142 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 4143 !strconcat(asm, "\t{$src, $dst {${mask}}|", 4144 "$dst {${mask}}, $src}"), 4145 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 4146 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4147 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 4148 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 4149 "$dst {${mask}} {z}, $src}"), 4150 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 4151 } 4152 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 4153 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4154 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 4155 EVEX, Sched<[WriteFStore]>; 4156 let mayStore = 1, hasSideEffects = 0 in 4157 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 4158 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 4159 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 4160 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 4161 NotMemoryFoldable; 4162} 4163 4164defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 4165 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 4166 4167defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 4168 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4169 4170defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 4171 [HasFP16]>, 4172 VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 4173 4174multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4175 PatLeaf ZeroFP, X86VectorVTInfo _> { 4176 4177def : Pat<(_.VT (OpNode _.RC:$src0, 4178 (_.VT (scalar_to_vector 4179 (_.EltVT (X86selects VK1WM:$mask, 4180 (_.EltVT _.FRC:$src1), 4181 (_.EltVT _.FRC:$src2))))))), 4182 (!cast<Instruction>(InstrStr#rrk) 4183 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4184 VK1WM:$mask, 4185 (_.VT _.RC:$src0), 4186 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4187 4188def : Pat<(_.VT (OpNode _.RC:$src0, 4189 (_.VT (scalar_to_vector 4190 (_.EltVT (X86selects VK1WM:$mask, 4191 (_.EltVT _.FRC:$src1), 4192 (_.EltVT ZeroFP))))))), 4193 (!cast<Instruction>(InstrStr#rrkz) 4194 VK1WM:$mask, 4195 (_.VT _.RC:$src0), 4196 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4197} 4198 4199multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4200 dag Mask, RegisterClass MaskRC> { 4201 4202def : Pat<(masked_store 4203 (_.info512.VT (insert_subvector undef, 4204 (_.info128.VT _.info128.RC:$src), 4205 (iPTR 0))), addr:$dst, Mask), 4206 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4207 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4208 _.info128.RC:$src)>; 4209 4210} 4211 4212multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4213 AVX512VLVectorVTInfo _, 4214 dag Mask, RegisterClass MaskRC, 4215 SubRegIndex subreg> { 4216 4217def : Pat<(masked_store 4218 (_.info512.VT (insert_subvector undef, 4219 (_.info128.VT _.info128.RC:$src), 4220 (iPTR 0))), addr:$dst, Mask), 4221 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4222 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4223 _.info128.RC:$src)>; 4224 4225} 4226 4227// This matches the more recent codegen from clang that avoids emitting a 512 4228// bit masked store directly. Codegen will widen 128-bit masked store to 512 4229// bits on AVX512F only targets. 4230multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4231 AVX512VLVectorVTInfo _, 4232 dag Mask512, dag Mask128, 4233 RegisterClass MaskRC, 4234 SubRegIndex subreg> { 4235 4236// AVX512F pattern. 4237def : Pat<(masked_store 4238 (_.info512.VT (insert_subvector undef, 4239 (_.info128.VT _.info128.RC:$src), 4240 (iPTR 0))), addr:$dst, Mask512), 4241 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4242 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4243 _.info128.RC:$src)>; 4244 4245// AVX512VL pattern. 4246def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4247 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4248 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4249 _.info128.RC:$src)>; 4250} 4251 4252multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4253 dag Mask, RegisterClass MaskRC> { 4254 4255def : Pat<(_.info128.VT (extract_subvector 4256 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4257 _.info512.ImmAllZerosV)), 4258 (iPTR 0))), 4259 (!cast<Instruction>(InstrStr#rmkz) 4260 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4261 addr:$srcAddr)>; 4262 4263def : Pat<(_.info128.VT (extract_subvector 4264 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4265 (_.info512.VT (insert_subvector undef, 4266 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4267 (iPTR 0))))), 4268 (iPTR 0))), 4269 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4270 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4271 addr:$srcAddr)>; 4272 4273} 4274 4275multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4276 AVX512VLVectorVTInfo _, 4277 dag Mask, RegisterClass MaskRC, 4278 SubRegIndex subreg> { 4279 4280def : Pat<(_.info128.VT (extract_subvector 4281 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4282 _.info512.ImmAllZerosV)), 4283 (iPTR 0))), 4284 (!cast<Instruction>(InstrStr#rmkz) 4285 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4286 addr:$srcAddr)>; 4287 4288def : Pat<(_.info128.VT (extract_subvector 4289 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4290 (_.info512.VT (insert_subvector undef, 4291 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4292 (iPTR 0))))), 4293 (iPTR 0))), 4294 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4295 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4296 addr:$srcAddr)>; 4297 4298} 4299 4300// This matches the more recent codegen from clang that avoids emitting a 512 4301// bit masked load directly. Codegen will widen 128-bit masked load to 512 4302// bits on AVX512F only targets. 4303multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4304 AVX512VLVectorVTInfo _, 4305 dag Mask512, dag Mask128, 4306 RegisterClass MaskRC, 4307 SubRegIndex subreg> { 4308// AVX512F patterns. 4309def : Pat<(_.info128.VT (extract_subvector 4310 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4311 _.info512.ImmAllZerosV)), 4312 (iPTR 0))), 4313 (!cast<Instruction>(InstrStr#rmkz) 4314 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4315 addr:$srcAddr)>; 4316 4317def : Pat<(_.info128.VT (extract_subvector 4318 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4319 (_.info512.VT (insert_subvector undef, 4320 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4321 (iPTR 0))))), 4322 (iPTR 0))), 4323 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4324 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4325 addr:$srcAddr)>; 4326 4327// AVX512Vl patterns. 4328def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4329 _.info128.ImmAllZerosV)), 4330 (!cast<Instruction>(InstrStr#rmkz) 4331 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4332 addr:$srcAddr)>; 4333 4334def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4335 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4336 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4337 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4338 addr:$srcAddr)>; 4339} 4340 4341defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4342defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4343defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4344 4345defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4346 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4347defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4348 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4349defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4350 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4351defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4352 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4353defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4354 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4355 4356defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4357 (v32i1 (insert_subvector 4358 (v32i1 immAllZerosV), 4359 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4360 (iPTR 0))), 4361 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4362 GR8, sub_8bit>; 4363defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4364 (v16i1 (insert_subvector 4365 (v16i1 immAllZerosV), 4366 (v4i1 (extract_subvector 4367 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4368 (iPTR 0))), 4369 (iPTR 0))), 4370 (v4i1 (extract_subvector 4371 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4372 (iPTR 0))), GR8, sub_8bit>; 4373defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4374 (v8i1 4375 (extract_subvector 4376 (v16i1 4377 (insert_subvector 4378 (v16i1 immAllZerosV), 4379 (v2i1 (extract_subvector 4380 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4381 (iPTR 0))), 4382 (iPTR 0))), 4383 (iPTR 0))), 4384 (v2i1 (extract_subvector 4385 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4386 (iPTR 0))), GR8, sub_8bit>; 4387 4388defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4389 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4390defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4391 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4392defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4393 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4394defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4395 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4396defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4397 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4398 4399defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4400 (v32i1 (insert_subvector 4401 (v32i1 immAllZerosV), 4402 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4403 (iPTR 0))), 4404 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4405 GR8, sub_8bit>; 4406defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4407 (v16i1 (insert_subvector 4408 (v16i1 immAllZerosV), 4409 (v4i1 (extract_subvector 4410 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4411 (iPTR 0))), 4412 (iPTR 0))), 4413 (v4i1 (extract_subvector 4414 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4415 (iPTR 0))), GR8, sub_8bit>; 4416defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4417 (v8i1 4418 (extract_subvector 4419 (v16i1 4420 (insert_subvector 4421 (v16i1 immAllZerosV), 4422 (v2i1 (extract_subvector 4423 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4424 (iPTR 0))), 4425 (iPTR 0))), 4426 (iPTR 0))), 4427 (v2i1 (extract_subvector 4428 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4429 (iPTR 0))), GR8, sub_8bit>; 4430 4431def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4432 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4433 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4434 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4435 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4436 4437def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4438 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4439 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4440 4441def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4442 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4443 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4444 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4445 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4446 4447def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4448 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4449 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4450 4451def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4452 (COPY_TO_REGCLASS 4453 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4454 VK1WM:$mask, addr:$src)), 4455 FR32X)>; 4456def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4457 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4458 4459def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4460 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4461 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4462 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4463 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4464 4465def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4466 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4467 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4468 4469def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4470 (COPY_TO_REGCLASS 4471 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4472 VK1WM:$mask, addr:$src)), 4473 FR64X)>; 4474def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4475 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4476 4477 4478def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4479 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4480def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4481 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4482 4483def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4484 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4485def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4486 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4487 4488let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4489 let Predicates = [HasFP16] in { 4490 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4491 (ins VR128X:$src1, VR128X:$src2), 4492 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4493 []>, T_MAP5XS, EVEX_4V, VEX_LIG, 4494 FoldGenData<"VMOVSHZrr">, 4495 Sched<[SchedWriteFShuffle.XMM]>; 4496 4497 let Constraints = "$src0 = $dst" in 4498 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4499 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4500 VR128X:$src1, VR128X:$src2), 4501 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4502 "$dst {${mask}}, $src1, $src2}", 4503 []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG, 4504 FoldGenData<"VMOVSHZrrk">, 4505 Sched<[SchedWriteFShuffle.XMM]>; 4506 4507 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4508 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4509 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4510 "$dst {${mask}} {z}, $src1, $src2}", 4511 []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG, 4512 FoldGenData<"VMOVSHZrrkz">, 4513 Sched<[SchedWriteFShuffle.XMM]>; 4514 } 4515 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4516 (ins VR128X:$src1, VR128X:$src2), 4517 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4518 []>, XS, EVEX_4V, VEX_LIG, 4519 FoldGenData<"VMOVSSZrr">, 4520 Sched<[SchedWriteFShuffle.XMM]>; 4521 4522 let Constraints = "$src0 = $dst" in 4523 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4524 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4525 VR128X:$src1, VR128X:$src2), 4526 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4527 "$dst {${mask}}, $src1, $src2}", 4528 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4529 FoldGenData<"VMOVSSZrrk">, 4530 Sched<[SchedWriteFShuffle.XMM]>; 4531 4532 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4533 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4534 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4535 "$dst {${mask}} {z}, $src1, $src2}", 4536 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4537 FoldGenData<"VMOVSSZrrkz">, 4538 Sched<[SchedWriteFShuffle.XMM]>; 4539 4540 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4541 (ins VR128X:$src1, VR128X:$src2), 4542 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4543 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4544 FoldGenData<"VMOVSDZrr">, 4545 Sched<[SchedWriteFShuffle.XMM]>; 4546 4547 let Constraints = "$src0 = $dst" in 4548 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4549 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4550 VR128X:$src1, VR128X:$src2), 4551 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4552 "$dst {${mask}}, $src1, $src2}", 4553 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4554 VEX_W, FoldGenData<"VMOVSDZrrk">, 4555 Sched<[SchedWriteFShuffle.XMM]>; 4556 4557 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4558 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4559 VR128X:$src2), 4560 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4561 "$dst {${mask}} {z}, $src1, $src2}", 4562 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4563 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4564 Sched<[SchedWriteFShuffle.XMM]>; 4565} 4566 4567def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4568 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4569def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4570 "$dst {${mask}}, $src1, $src2}", 4571 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4572 VR128X:$src1, VR128X:$src2), 0>; 4573def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4574 "$dst {${mask}} {z}, $src1, $src2}", 4575 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4576 VR128X:$src1, VR128X:$src2), 0>; 4577def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4578 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4579def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4580 "$dst {${mask}}, $src1, $src2}", 4581 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4582 VR128X:$src1, VR128X:$src2), 0>; 4583def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4584 "$dst {${mask}} {z}, $src1, $src2}", 4585 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4586 VR128X:$src1, VR128X:$src2), 0>; 4587def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4588 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4589def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4590 "$dst {${mask}}, $src1, $src2}", 4591 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4592 VR128X:$src1, VR128X:$src2), 0>; 4593def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4594 "$dst {${mask}} {z}, $src1, $src2}", 4595 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4596 VR128X:$src1, VR128X:$src2), 0>; 4597 4598let Predicates = [HasAVX512, OptForSize] in { 4599 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4600 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4601 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4602 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4603 4604 // Move low f32 and clear high bits. 4605 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4606 (SUBREG_TO_REG (i32 0), 4607 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4608 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4609 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4610 (SUBREG_TO_REG (i32 0), 4611 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4612 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4613 4614 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4615 (SUBREG_TO_REG (i32 0), 4616 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4617 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4618 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4619 (SUBREG_TO_REG (i32 0), 4620 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4621 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4622} 4623 4624// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4625// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4626let Predicates = [HasAVX512, OptForSpeed] in { 4627 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4628 (SUBREG_TO_REG (i32 0), 4629 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4630 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4631 (i8 1))), sub_xmm)>; 4632 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4633 (SUBREG_TO_REG (i32 0), 4634 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4635 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4636 (i8 3))), sub_xmm)>; 4637} 4638 4639let Predicates = [HasAVX512] in { 4640 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4641 (VMOVSSZrm addr:$src)>; 4642 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4643 (VMOVSDZrm addr:$src)>; 4644 4645 // Represent the same patterns above but in the form they appear for 4646 // 256-bit types 4647 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4648 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4649 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4650 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4651 4652 // Represent the same patterns above but in the form they appear for 4653 // 512-bit types 4654 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4655 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4656 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4657 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4658} 4659let Predicates = [HasFP16] in { 4660 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4661 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4662 4663 // FIXME we need better canonicalization in dag combine 4664 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4665 (SUBREG_TO_REG (i32 0), 4666 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4667 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4668 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4669 (SUBREG_TO_REG (i32 0), 4670 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4671 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4672 4673 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4674 (VMOVSHZrm addr:$src)>; 4675 4676 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4677 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4678 4679 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4680 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4681} 4682 4683let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4684def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4685 (ins VR128X:$src), 4686 "vmovq\t{$src, $dst|$dst, $src}", 4687 [(set VR128X:$dst, (v2i64 (X86vzmovl 4688 (v2i64 VR128X:$src))))]>, 4689 EVEX, VEX_W; 4690} 4691 4692let Predicates = [HasAVX512] in { 4693 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4694 (VMOVDI2PDIZrr GR32:$src)>; 4695 4696 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4697 (VMOV64toPQIZrr GR64:$src)>; 4698 4699 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4700 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4701 (VMOVDI2PDIZrm addr:$src)>; 4702 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4703 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4704 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4705 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4706 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4707 (VMOVQI2PQIZrm addr:$src)>; 4708 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4709 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4710 4711 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4712 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4713 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4714 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4715 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4716 4717 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4718 (SUBREG_TO_REG (i32 0), 4719 (v2f64 (VMOVZPQILo2PQIZrr 4720 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4721 sub_xmm)>; 4722 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4723 (SUBREG_TO_REG (i32 0), 4724 (v2i64 (VMOVZPQILo2PQIZrr 4725 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4726 sub_xmm)>; 4727 4728 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4729 (SUBREG_TO_REG (i32 0), 4730 (v2f64 (VMOVZPQILo2PQIZrr 4731 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4732 sub_xmm)>; 4733 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4734 (SUBREG_TO_REG (i32 0), 4735 (v2i64 (VMOVZPQILo2PQIZrr 4736 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4737 sub_xmm)>; 4738} 4739 4740//===----------------------------------------------------------------------===// 4741// AVX-512 - Non-temporals 4742//===----------------------------------------------------------------------===// 4743 4744def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4745 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4746 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4747 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4748 4749let Predicates = [HasVLX] in { 4750 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4751 (ins i256mem:$src), 4752 "vmovntdqa\t{$src, $dst|$dst, $src}", 4753 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4754 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4755 4756 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4757 (ins i128mem:$src), 4758 "vmovntdqa\t{$src, $dst|$dst, $src}", 4759 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4760 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4761} 4762 4763multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4764 X86SchedWriteMoveLS Sched, 4765 PatFrag st_frag = alignednontemporalstore> { 4766 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4767 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4768 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4769 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4770 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4771} 4772 4773multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4774 AVX512VLVectorVTInfo VTInfo, 4775 X86SchedWriteMoveLSWidths Sched> { 4776 let Predicates = [HasAVX512] in 4777 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4778 4779 let Predicates = [HasAVX512, HasVLX] in { 4780 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4781 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4782 } 4783} 4784 4785defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4786 SchedWriteVecMoveLSNT>, PD; 4787defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4788 SchedWriteFMoveLSNT>, PD, VEX_W; 4789defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4790 SchedWriteFMoveLSNT>, PS; 4791 4792let Predicates = [HasAVX512], AddedComplexity = 400 in { 4793 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4794 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4795 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4796 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4797 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4798 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4799 4800 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4801 (VMOVNTDQAZrm addr:$src)>; 4802 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4803 (VMOVNTDQAZrm addr:$src)>; 4804 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4805 (VMOVNTDQAZrm addr:$src)>; 4806 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4807 (VMOVNTDQAZrm addr:$src)>; 4808 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4809 (VMOVNTDQAZrm addr:$src)>; 4810 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4811 (VMOVNTDQAZrm addr:$src)>; 4812} 4813 4814let Predicates = [HasVLX], AddedComplexity = 400 in { 4815 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4816 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4817 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4818 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4819 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4820 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4821 4822 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4823 (VMOVNTDQAZ256rm addr:$src)>; 4824 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4825 (VMOVNTDQAZ256rm addr:$src)>; 4826 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4827 (VMOVNTDQAZ256rm addr:$src)>; 4828 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4829 (VMOVNTDQAZ256rm addr:$src)>; 4830 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4831 (VMOVNTDQAZ256rm addr:$src)>; 4832 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4833 (VMOVNTDQAZ256rm addr:$src)>; 4834 4835 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4836 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4837 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4838 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4839 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4840 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4841 4842 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4843 (VMOVNTDQAZ128rm addr:$src)>; 4844 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4845 (VMOVNTDQAZ128rm addr:$src)>; 4846 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4847 (VMOVNTDQAZ128rm addr:$src)>; 4848 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4849 (VMOVNTDQAZ128rm addr:$src)>; 4850 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4851 (VMOVNTDQAZ128rm addr:$src)>; 4852 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4853 (VMOVNTDQAZ128rm addr:$src)>; 4854} 4855 4856//===----------------------------------------------------------------------===// 4857// AVX-512 - Integer arithmetic 4858// 4859multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4860 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4861 bit IsCommutable = 0> { 4862 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4863 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4864 "$src2, $src1", "$src1, $src2", 4865 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4866 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4867 Sched<[sched]>; 4868 4869 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4870 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4871 "$src2, $src1", "$src1, $src2", 4872 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4873 AVX512BIBase, EVEX_4V, 4874 Sched<[sched.Folded, sched.ReadAfterFold]>; 4875} 4876 4877multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4878 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4879 bit IsCommutable = 0> : 4880 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4881 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4882 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4883 "${src2}"#_.BroadcastStr#", $src1", 4884 "$src1, ${src2}"#_.BroadcastStr, 4885 (_.VT (OpNode _.RC:$src1, 4886 (_.BroadcastLdFrag addr:$src2)))>, 4887 AVX512BIBase, EVEX_4V, EVEX_B, 4888 Sched<[sched.Folded, sched.ReadAfterFold]>; 4889} 4890 4891multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4892 AVX512VLVectorVTInfo VTInfo, 4893 X86SchedWriteWidths sched, Predicate prd, 4894 bit IsCommutable = 0> { 4895 let Predicates = [prd] in 4896 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4897 IsCommutable>, EVEX_V512; 4898 4899 let Predicates = [prd, HasVLX] in { 4900 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4901 sched.YMM, IsCommutable>, EVEX_V256; 4902 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4903 sched.XMM, IsCommutable>, EVEX_V128; 4904 } 4905} 4906 4907multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4908 AVX512VLVectorVTInfo VTInfo, 4909 X86SchedWriteWidths sched, Predicate prd, 4910 bit IsCommutable = 0> { 4911 let Predicates = [prd] in 4912 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4913 IsCommutable>, EVEX_V512; 4914 4915 let Predicates = [prd, HasVLX] in { 4916 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4917 sched.YMM, IsCommutable>, EVEX_V256; 4918 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4919 sched.XMM, IsCommutable>, EVEX_V128; 4920 } 4921} 4922 4923multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4924 X86SchedWriteWidths sched, Predicate prd, 4925 bit IsCommutable = 0> { 4926 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4927 sched, prd, IsCommutable>, 4928 VEX_W, EVEX_CD8<64, CD8VF>; 4929} 4930 4931multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4932 X86SchedWriteWidths sched, Predicate prd, 4933 bit IsCommutable = 0> { 4934 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4935 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4936} 4937 4938multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4939 X86SchedWriteWidths sched, Predicate prd, 4940 bit IsCommutable = 0> { 4941 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4942 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4943 VEX_WIG; 4944} 4945 4946multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4947 X86SchedWriteWidths sched, Predicate prd, 4948 bit IsCommutable = 0> { 4949 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4950 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4951 VEX_WIG; 4952} 4953 4954multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4955 SDNode OpNode, X86SchedWriteWidths sched, 4956 Predicate prd, bit IsCommutable = 0> { 4957 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4958 IsCommutable>; 4959 4960 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4961 IsCommutable>; 4962} 4963 4964multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4965 SDNode OpNode, X86SchedWriteWidths sched, 4966 Predicate prd, bit IsCommutable = 0> { 4967 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4968 IsCommutable>; 4969 4970 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4971 IsCommutable>; 4972} 4973 4974multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4975 bits<8> opc_d, bits<8> opc_q, 4976 string OpcodeStr, SDNode OpNode, 4977 X86SchedWriteWidths sched, 4978 bit IsCommutable = 0> { 4979 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4980 sched, HasAVX512, IsCommutable>, 4981 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4982 sched, HasBWI, IsCommutable>; 4983} 4984 4985multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4986 X86FoldableSchedWrite sched, 4987 SDNode OpNode,X86VectorVTInfo _Src, 4988 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4989 bit IsCommutable = 0> { 4990 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4991 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4992 "$src2, $src1","$src1, $src2", 4993 (_Dst.VT (OpNode 4994 (_Src.VT _Src.RC:$src1), 4995 (_Src.VT _Src.RC:$src2))), 4996 IsCommutable>, 4997 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4998 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4999 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5000 "$src2, $src1", "$src1, $src2", 5001 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5002 (_Src.LdFrag addr:$src2)))>, 5003 AVX512BIBase, EVEX_4V, 5004 Sched<[sched.Folded, sched.ReadAfterFold]>; 5005 5006 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5007 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 5008 OpcodeStr, 5009 "${src2}"#_Brdct.BroadcastStr#", $src1", 5010 "$src1, ${src2}"#_Brdct.BroadcastStr, 5011 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5012 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 5013 AVX512BIBase, EVEX_4V, EVEX_B, 5014 Sched<[sched.Folded, sched.ReadAfterFold]>; 5015} 5016 5017defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 5018 SchedWriteVecALU, 1>; 5019defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 5020 SchedWriteVecALU, 0>; 5021defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 5022 SchedWriteVecALU, HasBWI, 1>; 5023defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 5024 SchedWriteVecALU, HasBWI, 0>; 5025defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 5026 SchedWriteVecALU, HasBWI, 1>; 5027defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 5028 SchedWriteVecALU, HasBWI, 0>; 5029defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 5030 SchedWritePMULLD, HasAVX512, 1>, T8PD; 5031defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 5032 SchedWriteVecIMul, HasBWI, 1>; 5033defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 5034 SchedWriteVecIMul, HasDQI, 1>, T8PD, 5035 NotEVEX2VEXConvertible; 5036defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 5037 HasBWI, 1>; 5038defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 5039 HasBWI, 1>; 5040defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 5041 SchedWriteVecIMul, HasBWI, 1>, T8PD; 5042defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 5043 SchedWriteVecALU, HasBWI, 1>; 5044defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 5045 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 5046defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 5047 SchedWriteVecIMul, HasAVX512, 1>; 5048 5049multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 5050 X86SchedWriteWidths sched, 5051 AVX512VLVectorVTInfo _SrcVTInfo, 5052 AVX512VLVectorVTInfo _DstVTInfo, 5053 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 5054 let Predicates = [prd] in 5055 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 5056 _SrcVTInfo.info512, _DstVTInfo.info512, 5057 v8i64_info, IsCommutable>, 5058 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 5059 let Predicates = [HasVLX, prd] in { 5060 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 5061 _SrcVTInfo.info256, _DstVTInfo.info256, 5062 v4i64x_info, IsCommutable>, 5063 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 5064 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 5065 _SrcVTInfo.info128, _DstVTInfo.info128, 5066 v2i64x_info, IsCommutable>, 5067 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 5068 } 5069} 5070 5071defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 5072 avx512vl_i8_info, avx512vl_i8_info, 5073 X86multishift, HasVBMI, 0>, T8PD; 5074 5075multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5076 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 5077 X86FoldableSchedWrite sched> { 5078 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5079 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 5080 OpcodeStr, 5081 "${src2}"#_Src.BroadcastStr#", $src1", 5082 "$src1, ${src2}"#_Src.BroadcastStr, 5083 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5084 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 5085 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 5086 Sched<[sched.Folded, sched.ReadAfterFold]>; 5087} 5088 5089multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 5090 SDNode OpNode,X86VectorVTInfo _Src, 5091 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 5092 bit IsCommutable = 0> { 5093 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 5094 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 5095 "$src2, $src1","$src1, $src2", 5096 (_Dst.VT (OpNode 5097 (_Src.VT _Src.RC:$src1), 5098 (_Src.VT _Src.RC:$src2))), 5099 IsCommutable, IsCommutable>, 5100 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 5101 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5102 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5103 "$src2, $src1", "$src1, $src2", 5104 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5105 (_Src.LdFrag addr:$src2)))>, 5106 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 5107 Sched<[sched.Folded, sched.ReadAfterFold]>; 5108} 5109 5110multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 5111 SDNode OpNode> { 5112 let Predicates = [HasBWI] in 5113 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 5114 v32i16_info, SchedWriteShuffle.ZMM>, 5115 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 5116 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 5117 let Predicates = [HasBWI, HasVLX] in { 5118 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 5119 v16i16x_info, SchedWriteShuffle.YMM>, 5120 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 5121 v16i16x_info, SchedWriteShuffle.YMM>, 5122 EVEX_V256; 5123 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 5124 v8i16x_info, SchedWriteShuffle.XMM>, 5125 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 5126 v8i16x_info, SchedWriteShuffle.XMM>, 5127 EVEX_V128; 5128 } 5129} 5130multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 5131 SDNode OpNode> { 5132 let Predicates = [HasBWI] in 5133 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 5134 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 5135 let Predicates = [HasBWI, HasVLX] in { 5136 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 5137 v32i8x_info, SchedWriteShuffle.YMM>, 5138 EVEX_V256, VEX_WIG; 5139 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 5140 v16i8x_info, SchedWriteShuffle.XMM>, 5141 EVEX_V128, VEX_WIG; 5142 } 5143} 5144 5145multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 5146 SDNode OpNode, AVX512VLVectorVTInfo _Src, 5147 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 5148 let Predicates = [HasBWI] in 5149 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 5150 _Dst.info512, SchedWriteVecIMul.ZMM, 5151 IsCommutable>, EVEX_V512; 5152 let Predicates = [HasBWI, HasVLX] in { 5153 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 5154 _Dst.info256, SchedWriteVecIMul.YMM, 5155 IsCommutable>, EVEX_V256; 5156 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 5157 _Dst.info128, SchedWriteVecIMul.XMM, 5158 IsCommutable>, EVEX_V128; 5159 } 5160} 5161 5162defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 5163defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 5164defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 5165defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 5166 5167defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 5168 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 5169defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 5170 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 5171 5172defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 5173 SchedWriteVecALU, HasBWI, 1>, T8PD; 5174defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 5175 SchedWriteVecALU, HasBWI, 1>; 5176defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 5177 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5178defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 5179 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5180 NotEVEX2VEXConvertible; 5181 5182defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 5183 SchedWriteVecALU, HasBWI, 1>; 5184defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 5185 SchedWriteVecALU, HasBWI, 1>, T8PD; 5186defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 5187 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5188defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 5189 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5190 NotEVEX2VEXConvertible; 5191 5192defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 5193 SchedWriteVecALU, HasBWI, 1>, T8PD; 5194defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 5195 SchedWriteVecALU, HasBWI, 1>; 5196defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 5197 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5198defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 5199 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5200 NotEVEX2VEXConvertible; 5201 5202defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 5203 SchedWriteVecALU, HasBWI, 1>; 5204defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 5205 SchedWriteVecALU, HasBWI, 1>, T8PD; 5206defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 5207 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5208defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 5209 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5210 NotEVEX2VEXConvertible; 5211 5212// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 5213let Predicates = [HasDQI, NoVLX] in { 5214 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5215 (EXTRACT_SUBREG 5216 (VPMULLQZrr 5217 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5218 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5219 sub_ymm)>; 5220 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5221 (EXTRACT_SUBREG 5222 (VPMULLQZrmb 5223 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5224 addr:$src2), 5225 sub_ymm)>; 5226 5227 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5228 (EXTRACT_SUBREG 5229 (VPMULLQZrr 5230 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5231 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5232 sub_xmm)>; 5233 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5234 (EXTRACT_SUBREG 5235 (VPMULLQZrmb 5236 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5237 addr:$src2), 5238 sub_xmm)>; 5239} 5240 5241multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5242 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5243 (EXTRACT_SUBREG 5244 (!cast<Instruction>(Instr#"rr") 5245 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5246 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5247 sub_ymm)>; 5248 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5249 (EXTRACT_SUBREG 5250 (!cast<Instruction>(Instr#"rmb") 5251 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5252 addr:$src2), 5253 sub_ymm)>; 5254 5255 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5256 (EXTRACT_SUBREG 5257 (!cast<Instruction>(Instr#"rr") 5258 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5259 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5260 sub_xmm)>; 5261 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5262 (EXTRACT_SUBREG 5263 (!cast<Instruction>(Instr#"rmb") 5264 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5265 addr:$src2), 5266 sub_xmm)>; 5267} 5268 5269let Predicates = [HasAVX512, NoVLX] in { 5270 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5271 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5272 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5273 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5274} 5275 5276//===----------------------------------------------------------------------===// 5277// AVX-512 Logical Instructions 5278//===----------------------------------------------------------------------===// 5279 5280defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5281 SchedWriteVecLogic, HasAVX512, 1>; 5282defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5283 SchedWriteVecLogic, HasAVX512, 1>; 5284defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5285 SchedWriteVecLogic, HasAVX512, 1>; 5286defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5287 SchedWriteVecLogic, HasAVX512>; 5288 5289let Predicates = [HasVLX] in { 5290 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5291 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5292 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5293 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5294 5295 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5296 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5297 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5298 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5299 5300 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5301 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5302 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5303 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5304 5305 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5306 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5307 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5308 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5309 5310 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5311 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5312 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5313 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5314 5315 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5316 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5317 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5318 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5319 5320 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5321 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5322 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5323 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5324 5325 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5326 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5327 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5328 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5329 5330 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5331 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5332 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5333 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5334 5335 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5336 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5337 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5338 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5339 5340 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5341 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5342 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5343 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5344 5345 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5346 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5347 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5348 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5349 5350 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5351 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5352 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5353 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5354 5355 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5356 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5357 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5358 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5359 5360 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5361 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5362 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5363 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5364 5365 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5366 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5367 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5368 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5369} 5370 5371let Predicates = [HasAVX512] in { 5372 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5373 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5374 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5375 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5376 5377 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5378 (VPORQZrr VR512:$src1, VR512:$src2)>; 5379 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5380 (VPORQZrr VR512:$src1, VR512:$src2)>; 5381 5382 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5383 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5384 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5385 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5386 5387 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5388 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5389 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5390 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5391 5392 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5393 (VPANDQZrm VR512:$src1, addr:$src2)>; 5394 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5395 (VPANDQZrm VR512:$src1, addr:$src2)>; 5396 5397 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5398 (VPORQZrm VR512:$src1, addr:$src2)>; 5399 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5400 (VPORQZrm VR512:$src1, addr:$src2)>; 5401 5402 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5403 (VPXORQZrm VR512:$src1, addr:$src2)>; 5404 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5405 (VPXORQZrm VR512:$src1, addr:$src2)>; 5406 5407 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5408 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5409 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5410 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5411} 5412 5413// Patterns to catch vselect with different type than logic op. 5414multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5415 X86VectorVTInfo _, 5416 X86VectorVTInfo IntInfo> { 5417 // Masked register-register logical operations. 5418 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5419 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5420 _.RC:$src0)), 5421 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5422 _.RC:$src1, _.RC:$src2)>; 5423 5424 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5425 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5426 _.ImmAllZerosV)), 5427 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5428 _.RC:$src2)>; 5429 5430 // Masked register-memory logical operations. 5431 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5432 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5433 (load addr:$src2)))), 5434 _.RC:$src0)), 5435 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5436 _.RC:$src1, addr:$src2)>; 5437 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5438 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5439 (load addr:$src2)))), 5440 _.ImmAllZerosV)), 5441 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5442 addr:$src2)>; 5443} 5444 5445multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5446 X86VectorVTInfo _, 5447 X86VectorVTInfo IntInfo> { 5448 // Register-broadcast logical operations. 5449 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5450 (bitconvert 5451 (IntInfo.VT (OpNode _.RC:$src1, 5452 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5453 _.RC:$src0)), 5454 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5455 _.RC:$src1, addr:$src2)>; 5456 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5457 (bitconvert 5458 (IntInfo.VT (OpNode _.RC:$src1, 5459 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5460 _.ImmAllZerosV)), 5461 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5462 _.RC:$src1, addr:$src2)>; 5463} 5464 5465multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5466 AVX512VLVectorVTInfo SelectInfo, 5467 AVX512VLVectorVTInfo IntInfo> { 5468let Predicates = [HasVLX] in { 5469 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5470 IntInfo.info128>; 5471 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5472 IntInfo.info256>; 5473} 5474let Predicates = [HasAVX512] in { 5475 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5476 IntInfo.info512>; 5477} 5478} 5479 5480multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5481 AVX512VLVectorVTInfo SelectInfo, 5482 AVX512VLVectorVTInfo IntInfo> { 5483let Predicates = [HasVLX] in { 5484 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5485 SelectInfo.info128, IntInfo.info128>; 5486 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5487 SelectInfo.info256, IntInfo.info256>; 5488} 5489let Predicates = [HasAVX512] in { 5490 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5491 SelectInfo.info512, IntInfo.info512>; 5492} 5493} 5494 5495multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5496 // i64 vselect with i32/i16/i8 logic op 5497 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5498 avx512vl_i32_info>; 5499 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5500 avx512vl_i16_info>; 5501 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5502 avx512vl_i8_info>; 5503 5504 // i32 vselect with i64/i16/i8 logic op 5505 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5506 avx512vl_i64_info>; 5507 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5508 avx512vl_i16_info>; 5509 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5510 avx512vl_i8_info>; 5511 5512 // f32 vselect with i64/i32/i16/i8 logic op 5513 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5514 avx512vl_i64_info>; 5515 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5516 avx512vl_i32_info>; 5517 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5518 avx512vl_i16_info>; 5519 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5520 avx512vl_i8_info>; 5521 5522 // f64 vselect with i64/i32/i16/i8 logic op 5523 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5524 avx512vl_i64_info>; 5525 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5526 avx512vl_i32_info>; 5527 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5528 avx512vl_i16_info>; 5529 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5530 avx512vl_i8_info>; 5531 5532 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5533 avx512vl_f32_info, 5534 avx512vl_i32_info>; 5535 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5536 avx512vl_f64_info, 5537 avx512vl_i64_info>; 5538} 5539 5540defm : avx512_logical_lowering_types<"VPAND", and>; 5541defm : avx512_logical_lowering_types<"VPOR", or>; 5542defm : avx512_logical_lowering_types<"VPXOR", xor>; 5543defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5544 5545//===----------------------------------------------------------------------===// 5546// AVX-512 FP arithmetic 5547//===----------------------------------------------------------------------===// 5548 5549multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5550 SDPatternOperator OpNode, SDNode VecNode, 5551 X86FoldableSchedWrite sched, bit IsCommutable> { 5552 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5553 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5554 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5555 "$src2, $src1", "$src1, $src2", 5556 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5557 Sched<[sched]>; 5558 5559 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5560 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5561 "$src2, $src1", "$src1, $src2", 5562 (_.VT (VecNode _.RC:$src1, 5563 (_.ScalarIntMemFrags addr:$src2)))>, 5564 Sched<[sched.Folded, sched.ReadAfterFold]>; 5565 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5566 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5567 (ins _.FRC:$src1, _.FRC:$src2), 5568 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5569 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5570 Sched<[sched]> { 5571 let isCommutable = IsCommutable; 5572 } 5573 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5574 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5575 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5576 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5577 (_.ScalarLdFrag addr:$src2)))]>, 5578 Sched<[sched.Folded, sched.ReadAfterFold]>; 5579 } 5580 } 5581} 5582 5583multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5584 SDNode VecNode, X86FoldableSchedWrite sched> { 5585 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5586 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5587 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5588 "$rc, $src2, $src1", "$src1, $src2, $rc", 5589 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5590 (i32 timm:$rc))>, 5591 EVEX_B, EVEX_RC, Sched<[sched]>; 5592} 5593multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5594 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5595 X86FoldableSchedWrite sched, bit IsCommutable, 5596 string EVEX2VexOvrd> { 5597 let ExeDomain = _.ExeDomain in { 5598 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5599 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5600 "$src2, $src1", "$src1, $src2", 5601 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5602 Sched<[sched]>, SIMD_EXC; 5603 5604 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5605 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5606 "$src2, $src1", "$src1, $src2", 5607 (_.VT (VecNode _.RC:$src1, 5608 (_.ScalarIntMemFrags addr:$src2)))>, 5609 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5610 5611 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5612 Uses = [MXCSR], mayRaiseFPException = 1 in { 5613 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5614 (ins _.FRC:$src1, _.FRC:$src2), 5615 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5616 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5617 Sched<[sched]>, 5618 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5619 let isCommutable = IsCommutable; 5620 } 5621 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5622 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5623 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5624 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5625 (_.ScalarLdFrag addr:$src2)))]>, 5626 Sched<[sched.Folded, sched.ReadAfterFold]>, 5627 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5628 } 5629 5630 let Uses = [MXCSR] in 5631 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5632 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5633 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5634 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5635 EVEX_B, Sched<[sched]>; 5636 } 5637} 5638 5639multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5640 SDNode VecNode, SDNode RndNode, 5641 X86SchedWriteSizes sched, bit IsCommutable> { 5642 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5643 sched.PS.Scl, IsCommutable>, 5644 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5645 sched.PS.Scl>, 5646 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5647 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5648 sched.PD.Scl, IsCommutable>, 5649 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5650 sched.PD.Scl>, 5651 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5652 let Predicates = [HasFP16] in 5653 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5654 VecNode, sched.PH.Scl, IsCommutable>, 5655 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5656 sched.PH.Scl>, 5657 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5658} 5659 5660multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5661 SDNode VecNode, SDNode SaeNode, 5662 X86SchedWriteSizes sched, bit IsCommutable> { 5663 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5664 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5665 NAME#"SS">, 5666 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5667 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5668 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5669 NAME#"SD">, 5670 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5671 let Predicates = [HasFP16] in { 5672 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5673 VecNode, SaeNode, sched.PH.Scl, IsCommutable, 5674 NAME#"SH">, 5675 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, 5676 NotEVEX2VEXConvertible; 5677 } 5678} 5679defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5680 SchedWriteFAddSizes, 1>; 5681defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5682 SchedWriteFMulSizes, 1>; 5683defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5684 SchedWriteFAddSizes, 0>; 5685defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5686 SchedWriteFDivSizes, 0>; 5687defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5688 SchedWriteFCmpSizes, 0>; 5689defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5690 SchedWriteFCmpSizes, 0>; 5691 5692// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5693// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5694multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5695 X86VectorVTInfo _, SDNode OpNode, 5696 X86FoldableSchedWrite sched, 5697 string EVEX2VEXOvrd> { 5698 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5699 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5700 (ins _.FRC:$src1, _.FRC:$src2), 5701 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5702 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5703 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5704 let isCommutable = 1; 5705 } 5706 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5707 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5708 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5709 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5710 (_.ScalarLdFrag addr:$src2)))]>, 5711 Sched<[sched.Folded, sched.ReadAfterFold]>, 5712 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5713 } 5714} 5715defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5716 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5717 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5718 5719defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5720 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5721 VEX_W, EVEX_4V, VEX_LIG, 5722 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5723 5724defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5725 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5726 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5727 5728defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5729 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5730 VEX_W, EVEX_4V, VEX_LIG, 5731 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5732 5733defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5734 SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS, 5735 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5736 NotEVEX2VEXConvertible; 5737defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5738 SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS, 5739 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5740 NotEVEX2VEXConvertible; 5741 5742multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5743 SDPatternOperator MaskOpNode, 5744 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5745 bit IsCommutable, 5746 bit IsKCommutable = IsCommutable, 5747 string suffix = _.Suffix, 5748 string ClobberConstraint = "", 5749 bit MayRaiseFPException = 1> { 5750 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5751 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5752 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5753 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5754 "$src2, $src1", "$src1, $src2", 5755 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5756 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5757 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>; 5758 let mayLoad = 1 in { 5759 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5760 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5761 "$src2, $src1", "$src1, $src2", 5762 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5763 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5764 ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5765 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5766 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5767 "${src2}"#_.BroadcastStr#", $src1", 5768 "$src1, ${src2}"#_.BroadcastStr, 5769 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5770 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5771 ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5772 } 5773 } 5774} 5775 5776multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5777 SDPatternOperator OpNodeRnd, 5778 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5779 string suffix = _.Suffix, 5780 string ClobberConstraint = ""> { 5781 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5782 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5783 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5784 "$rc, $src2, $src1", "$src1, $src2, $rc", 5785 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5786 0, 0, 0, vselect_mask, ClobberConstraint>, 5787 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5788} 5789 5790multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5791 SDPatternOperator OpNodeSAE, 5792 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5793 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5794 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5795 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5796 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5797 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5798 EVEX_4V, EVEX_B, Sched<[sched]>; 5799} 5800 5801multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5802 SDPatternOperator MaskOpNode, 5803 Predicate prd, X86SchedWriteSizes sched, 5804 bit IsCommutable = 0, 5805 bit IsPD128Commutable = IsCommutable> { 5806 let Predicates = [prd] in { 5807 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5808 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5809 EVEX_CD8<32, CD8VF>; 5810 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5811 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5812 EVEX_CD8<64, CD8VF>; 5813 } 5814 5815 // Define only if AVX512VL feature is present. 5816 let Predicates = [prd, HasVLX] in { 5817 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5818 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5819 EVEX_CD8<32, CD8VF>; 5820 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5821 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5822 EVEX_CD8<32, CD8VF>; 5823 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5824 sched.PD.XMM, IsPD128Commutable, 5825 IsCommutable>, EVEX_V128, PD, VEX_W, 5826 EVEX_CD8<64, CD8VF>; 5827 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5828 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5829 EVEX_CD8<64, CD8VF>; 5830 } 5831} 5832 5833multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5834 SDPatternOperator MaskOpNode, 5835 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5836 let Predicates = [HasFP16] in { 5837 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5838 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS, 5839 EVEX_CD8<16, CD8VF>; 5840 } 5841 let Predicates = [HasVLX, HasFP16] in { 5842 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5843 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS, 5844 EVEX_CD8<16, CD8VF>; 5845 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5846 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS, 5847 EVEX_CD8<16, CD8VF>; 5848 } 5849} 5850 5851let Uses = [MXCSR] in 5852multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5853 X86SchedWriteSizes sched> { 5854 let Predicates = [HasFP16] in { 5855 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5856 v32f16_info>, 5857 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5858 } 5859 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5860 v16f32_info>, 5861 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5862 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5863 v8f64_info>, 5864 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5865} 5866 5867let Uses = [MXCSR] in 5868multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5869 X86SchedWriteSizes sched> { 5870 let Predicates = [HasFP16] in { 5871 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5872 v32f16_info>, 5873 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5874 } 5875 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5876 v16f32_info>, 5877 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5878 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5879 v8f64_info>, 5880 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5881} 5882 5883defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5884 SchedWriteFAddSizes, 1>, 5885 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5886 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5887defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5888 SchedWriteFMulSizes, 1>, 5889 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5890 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5891defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5892 SchedWriteFAddSizes>, 5893 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5894 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5895defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5896 SchedWriteFDivSizes>, 5897 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5898 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5899defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5900 SchedWriteFCmpSizes, 0>, 5901 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5902 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5903defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5904 SchedWriteFCmpSizes, 0>, 5905 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5906 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5907let isCodeGenOnly = 1 in { 5908 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5909 SchedWriteFCmpSizes, 1>, 5910 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5911 SchedWriteFCmpSizes, 1>; 5912 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5913 SchedWriteFCmpSizes, 1>, 5914 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5915 SchedWriteFCmpSizes, 1>; 5916} 5917let Uses = []<Register>, mayRaiseFPException = 0 in { 5918defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5919 SchedWriteFLogicSizes, 1>; 5920defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5921 SchedWriteFLogicSizes, 0>; 5922defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5923 SchedWriteFLogicSizes, 1>; 5924defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5925 SchedWriteFLogicSizes, 1>; 5926} 5927 5928multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5929 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5930 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5931 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5932 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5933 "$src2, $src1", "$src1, $src2", 5934 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5935 EVEX_4V, Sched<[sched]>; 5936 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5937 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5938 "$src2, $src1", "$src1, $src2", 5939 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5940 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5941 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5942 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5943 "${src2}"#_.BroadcastStr#", $src1", 5944 "$src1, ${src2}"#_.BroadcastStr, 5945 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5946 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5947 } 5948} 5949 5950multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5951 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5952 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5953 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5954 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5955 "$src2, $src1", "$src1, $src2", 5956 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5957 Sched<[sched]>; 5958 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5959 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5960 "$src2, $src1", "$src1, $src2", 5961 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5962 Sched<[sched.Folded, sched.ReadAfterFold]>; 5963 } 5964} 5965 5966multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5967 X86SchedWriteWidths sched> { 5968 let Predicates = [HasFP16] in { 5969 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 5970 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 5971 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 5972 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 5973 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 5974 EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>; 5975 } 5976 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5977 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5978 EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD; 5979 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5980 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5981 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 5982 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5983 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5984 X86scalefsRnd, sched.Scl>, 5985 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD; 5986 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5987 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5988 X86scalefsRnd, sched.Scl>, 5989 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD; 5990 5991 // Define only if AVX512VL feature is present. 5992 let Predicates = [HasVLX] in { 5993 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5994 EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD; 5995 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5996 EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD; 5997 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5998 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 5999 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 6000 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 6001 } 6002 6003 let Predicates = [HasFP16, HasVLX] in { 6004 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 6005 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6006 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 6007 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6008 } 6009} 6010defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 6011 SchedWriteFAdd>, NotEVEX2VEXConvertible; 6012 6013//===----------------------------------------------------------------------===// 6014// AVX-512 VPTESTM instructions 6015//===----------------------------------------------------------------------===// 6016 6017multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 6018 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6019 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 6020 // There are just too many permutations due to commutability and bitcasts. 6021 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6022 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 6023 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6024 "$src2, $src1", "$src1, $src2", 6025 (null_frag), (null_frag), 1>, 6026 EVEX_4V, Sched<[sched]>; 6027 let mayLoad = 1 in 6028 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6029 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6030 "$src2, $src1", "$src1, $src2", 6031 (null_frag), (null_frag)>, 6032 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6033 Sched<[sched.Folded, sched.ReadAfterFold]>; 6034 } 6035} 6036 6037multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 6038 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6039 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 6040 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6041 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6042 "${src2}"#_.BroadcastStr#", $src1", 6043 "$src1, ${src2}"#_.BroadcastStr, 6044 (null_frag), (null_frag)>, 6045 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6046 Sched<[sched.Folded, sched.ReadAfterFold]>; 6047} 6048 6049multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 6050 X86SchedWriteWidths sched, 6051 AVX512VLVectorVTInfo _> { 6052 let Predicates = [HasAVX512] in 6053 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 6054 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 6055 6056 let Predicates = [HasAVX512, HasVLX] in { 6057 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 6058 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 6059 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 6060 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 6061 } 6062} 6063 6064multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 6065 X86SchedWriteWidths sched> { 6066 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 6067 avx512vl_i32_info>; 6068 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 6069 avx512vl_i64_info>, VEX_W; 6070} 6071 6072multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 6073 X86SchedWriteWidths sched> { 6074 let Predicates = [HasBWI] in { 6075 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 6076 v32i16_info>, EVEX_V512, VEX_W; 6077 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 6078 v64i8_info>, EVEX_V512; 6079 } 6080 6081 let Predicates = [HasVLX, HasBWI] in { 6082 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 6083 v16i16x_info>, EVEX_V256, VEX_W; 6084 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 6085 v8i16x_info>, EVEX_V128, VEX_W; 6086 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 6087 v32i8x_info>, EVEX_V256; 6088 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 6089 v16i8x_info>, EVEX_V128; 6090 } 6091} 6092 6093multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 6094 X86SchedWriteWidths sched> : 6095 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 6096 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 6097 6098defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 6099 SchedWriteVecLogic>, T8PD; 6100defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 6101 SchedWriteVecLogic>, T8XS; 6102 6103//===----------------------------------------------------------------------===// 6104// AVX-512 Shift instructions 6105//===----------------------------------------------------------------------===// 6106 6107multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 6108 string OpcodeStr, SDNode OpNode, 6109 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6110 let ExeDomain = _.ExeDomain in { 6111 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 6112 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 6113 "$src2, $src1", "$src1, $src2", 6114 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 6115 Sched<[sched]>; 6116 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6117 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 6118 "$src2, $src1", "$src1, $src2", 6119 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 6120 (i8 timm:$src2)))>, 6121 Sched<[sched.Folded]>; 6122 } 6123} 6124 6125multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 6126 string OpcodeStr, SDNode OpNode, 6127 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6128 let ExeDomain = _.ExeDomain in 6129 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6130 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 6131 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 6132 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 6133 EVEX_B, Sched<[sched.Folded]>; 6134} 6135 6136multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6137 X86FoldableSchedWrite sched, ValueType SrcVT, 6138 X86VectorVTInfo _> { 6139 // src2 is always 128-bit 6140 let ExeDomain = _.ExeDomain in { 6141 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6142 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 6143 "$src2, $src1", "$src1, $src2", 6144 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 6145 AVX512BIBase, EVEX_4V, Sched<[sched]>; 6146 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6147 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 6148 "$src2, $src1", "$src1, $src2", 6149 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 6150 AVX512BIBase, 6151 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6152 } 6153} 6154 6155multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6156 X86SchedWriteWidths sched, ValueType SrcVT, 6157 AVX512VLVectorVTInfo VTInfo, 6158 Predicate prd> { 6159 let Predicates = [prd] in 6160 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 6161 VTInfo.info512>, EVEX_V512, 6162 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 6163 let Predicates = [prd, HasVLX] in { 6164 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 6165 VTInfo.info256>, EVEX_V256, 6166 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 6167 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 6168 VTInfo.info128>, EVEX_V128, 6169 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 6170 } 6171} 6172 6173multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 6174 string OpcodeStr, SDNode OpNode, 6175 X86SchedWriteWidths sched, 6176 bit NotEVEX2VEXConvertibleQ = 0> { 6177 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 6178 avx512vl_i32_info, HasAVX512>; 6179 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6180 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 6181 avx512vl_i64_info, HasAVX512>, VEX_W; 6182 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 6183 avx512vl_i16_info, HasBWI>; 6184} 6185 6186multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6187 string OpcodeStr, SDNode OpNode, 6188 X86SchedWriteWidths sched, 6189 AVX512VLVectorVTInfo VTInfo> { 6190 let Predicates = [HasAVX512] in 6191 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6192 sched.ZMM, VTInfo.info512>, 6193 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 6194 VTInfo.info512>, EVEX_V512; 6195 let Predicates = [HasAVX512, HasVLX] in { 6196 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6197 sched.YMM, VTInfo.info256>, 6198 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 6199 VTInfo.info256>, EVEX_V256; 6200 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6201 sched.XMM, VTInfo.info128>, 6202 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 6203 VTInfo.info128>, EVEX_V128; 6204 } 6205} 6206 6207multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 6208 string OpcodeStr, SDNode OpNode, 6209 X86SchedWriteWidths sched> { 6210 let Predicates = [HasBWI] in 6211 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6212 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 6213 let Predicates = [HasVLX, HasBWI] in { 6214 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6215 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 6216 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6217 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 6218 } 6219} 6220 6221multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 6222 Format ImmFormR, Format ImmFormM, 6223 string OpcodeStr, SDNode OpNode, 6224 X86SchedWriteWidths sched, 6225 bit NotEVEX2VEXConvertibleQ = 0> { 6226 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6227 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6228 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6229 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6230 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 6231} 6232 6233defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6234 SchedWriteVecShiftImm>, 6235 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6236 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6237 6238defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6239 SchedWriteVecShiftImm>, 6240 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6241 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6242 6243defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6244 SchedWriteVecShiftImm, 1>, 6245 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6246 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6247 6248defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6249 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6250defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6251 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6252 6253defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6254 SchedWriteVecShift>; 6255defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6256 SchedWriteVecShift, 1>; 6257defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6258 SchedWriteVecShift>; 6259 6260// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6261let Predicates = [HasAVX512, NoVLX] in { 6262 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6263 (EXTRACT_SUBREG (v8i64 6264 (VPSRAQZrr 6265 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6266 VR128X:$src2)), sub_ymm)>; 6267 6268 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6269 (EXTRACT_SUBREG (v8i64 6270 (VPSRAQZrr 6271 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6272 VR128X:$src2)), sub_xmm)>; 6273 6274 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6275 (EXTRACT_SUBREG (v8i64 6276 (VPSRAQZri 6277 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6278 timm:$src2)), sub_ymm)>; 6279 6280 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6281 (EXTRACT_SUBREG (v8i64 6282 (VPSRAQZri 6283 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6284 timm:$src2)), sub_xmm)>; 6285} 6286 6287//===-------------------------------------------------------------------===// 6288// Variable Bit Shifts 6289//===-------------------------------------------------------------------===// 6290 6291multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6292 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6293 let ExeDomain = _.ExeDomain in { 6294 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6295 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6296 "$src2, $src1", "$src1, $src2", 6297 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6298 AVX5128IBase, EVEX_4V, Sched<[sched]>; 6299 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6300 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6301 "$src2, $src1", "$src1, $src2", 6302 (_.VT (OpNode _.RC:$src1, 6303 (_.VT (_.LdFrag addr:$src2))))>, 6304 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6305 Sched<[sched.Folded, sched.ReadAfterFold]>; 6306 } 6307} 6308 6309multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6310 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6311 let ExeDomain = _.ExeDomain in 6312 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6313 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6314 "${src2}"#_.BroadcastStr#", $src1", 6315 "$src1, ${src2}"#_.BroadcastStr, 6316 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6317 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6318 Sched<[sched.Folded, sched.ReadAfterFold]>; 6319} 6320 6321multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6322 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6323 let Predicates = [HasAVX512] in 6324 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6325 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6326 6327 let Predicates = [HasAVX512, HasVLX] in { 6328 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6329 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6330 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6331 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6332 } 6333} 6334 6335multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6336 SDNode OpNode, X86SchedWriteWidths sched> { 6337 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6338 avx512vl_i32_info>; 6339 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6340 avx512vl_i64_info>, VEX_W; 6341} 6342 6343// Use 512bit version to implement 128/256 bit in case NoVLX. 6344multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6345 SDNode OpNode, list<Predicate> p> { 6346 let Predicates = p in { 6347 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6348 (_.info256.VT _.info256.RC:$src2))), 6349 (EXTRACT_SUBREG 6350 (!cast<Instruction>(OpcodeStr#"Zrr") 6351 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6352 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6353 sub_ymm)>; 6354 6355 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6356 (_.info128.VT _.info128.RC:$src2))), 6357 (EXTRACT_SUBREG 6358 (!cast<Instruction>(OpcodeStr#"Zrr") 6359 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6360 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6361 sub_xmm)>; 6362 } 6363} 6364multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6365 SDNode OpNode, X86SchedWriteWidths sched> { 6366 let Predicates = [HasBWI] in 6367 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6368 EVEX_V512, VEX_W; 6369 let Predicates = [HasVLX, HasBWI] in { 6370 6371 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6372 EVEX_V256, VEX_W; 6373 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6374 EVEX_V128, VEX_W; 6375 } 6376} 6377 6378defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6379 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6380 6381defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6382 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6383 6384defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6385 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6386 6387defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6388defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6389 6390defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6391defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6392defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6393defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6394 6395 6396// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6397let Predicates = [HasAVX512, NoVLX] in { 6398 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6399 (EXTRACT_SUBREG (v8i64 6400 (VPROLVQZrr 6401 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6402 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6403 sub_xmm)>; 6404 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6405 (EXTRACT_SUBREG (v8i64 6406 (VPROLVQZrr 6407 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6408 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6409 sub_ymm)>; 6410 6411 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6412 (EXTRACT_SUBREG (v16i32 6413 (VPROLVDZrr 6414 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6415 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6416 sub_xmm)>; 6417 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6418 (EXTRACT_SUBREG (v16i32 6419 (VPROLVDZrr 6420 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6421 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6422 sub_ymm)>; 6423 6424 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6425 (EXTRACT_SUBREG (v8i64 6426 (VPROLQZri 6427 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6428 timm:$src2)), sub_xmm)>; 6429 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6430 (EXTRACT_SUBREG (v8i64 6431 (VPROLQZri 6432 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6433 timm:$src2)), sub_ymm)>; 6434 6435 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6436 (EXTRACT_SUBREG (v16i32 6437 (VPROLDZri 6438 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6439 timm:$src2)), sub_xmm)>; 6440 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6441 (EXTRACT_SUBREG (v16i32 6442 (VPROLDZri 6443 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6444 timm:$src2)), sub_ymm)>; 6445} 6446 6447// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6448let Predicates = [HasAVX512, NoVLX] in { 6449 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6450 (EXTRACT_SUBREG (v8i64 6451 (VPRORVQZrr 6452 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6453 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6454 sub_xmm)>; 6455 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6456 (EXTRACT_SUBREG (v8i64 6457 (VPRORVQZrr 6458 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6459 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6460 sub_ymm)>; 6461 6462 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6463 (EXTRACT_SUBREG (v16i32 6464 (VPRORVDZrr 6465 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6466 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6467 sub_xmm)>; 6468 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6469 (EXTRACT_SUBREG (v16i32 6470 (VPRORVDZrr 6471 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6472 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6473 sub_ymm)>; 6474 6475 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6476 (EXTRACT_SUBREG (v8i64 6477 (VPRORQZri 6478 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6479 timm:$src2)), sub_xmm)>; 6480 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6481 (EXTRACT_SUBREG (v8i64 6482 (VPRORQZri 6483 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6484 timm:$src2)), sub_ymm)>; 6485 6486 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6487 (EXTRACT_SUBREG (v16i32 6488 (VPRORDZri 6489 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6490 timm:$src2)), sub_xmm)>; 6491 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6492 (EXTRACT_SUBREG (v16i32 6493 (VPRORDZri 6494 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6495 timm:$src2)), sub_ymm)>; 6496} 6497 6498//===-------------------------------------------------------------------===// 6499// 1-src variable permutation VPERMW/D/Q 6500//===-------------------------------------------------------------------===// 6501 6502multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6503 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6504 let Predicates = [HasAVX512] in 6505 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6506 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6507 6508 let Predicates = [HasAVX512, HasVLX] in 6509 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6510 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6511} 6512 6513multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6514 string OpcodeStr, SDNode OpNode, 6515 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6516 let Predicates = [HasAVX512] in 6517 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6518 sched, VTInfo.info512>, 6519 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6520 sched, VTInfo.info512>, EVEX_V512; 6521 let Predicates = [HasAVX512, HasVLX] in 6522 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6523 sched, VTInfo.info256>, 6524 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6525 sched, VTInfo.info256>, EVEX_V256; 6526} 6527 6528multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6529 Predicate prd, SDNode OpNode, 6530 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6531 let Predicates = [prd] in 6532 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6533 EVEX_V512 ; 6534 let Predicates = [HasVLX, prd] in { 6535 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6536 EVEX_V256 ; 6537 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6538 EVEX_V128 ; 6539 } 6540} 6541 6542defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6543 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6544defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6545 WriteVarShuffle256, avx512vl_i8_info>; 6546 6547defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6548 WriteVarShuffle256, avx512vl_i32_info>; 6549defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6550 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6551defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6552 WriteFVarShuffle256, avx512vl_f32_info>; 6553defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6554 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6555 6556defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6557 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6558 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6559defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6560 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6561 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6562 6563//===----------------------------------------------------------------------===// 6564// AVX-512 - VPERMIL 6565//===----------------------------------------------------------------------===// 6566 6567multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6568 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6569 X86VectorVTInfo Ctrl> { 6570 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6571 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6572 "$src2, $src1", "$src1, $src2", 6573 (_.VT (OpNode _.RC:$src1, 6574 (Ctrl.VT Ctrl.RC:$src2)))>, 6575 T8PD, EVEX_4V, Sched<[sched]>; 6576 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6577 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6578 "$src2, $src1", "$src1, $src2", 6579 (_.VT (OpNode 6580 _.RC:$src1, 6581 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6582 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6583 Sched<[sched.Folded, sched.ReadAfterFold]>; 6584 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6585 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6586 "${src2}"#_.BroadcastStr#", $src1", 6587 "$src1, ${src2}"#_.BroadcastStr, 6588 (_.VT (OpNode 6589 _.RC:$src1, 6590 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6591 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6592 Sched<[sched.Folded, sched.ReadAfterFold]>; 6593} 6594 6595multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6596 X86SchedWriteWidths sched, 6597 AVX512VLVectorVTInfo _, 6598 AVX512VLVectorVTInfo Ctrl> { 6599 let Predicates = [HasAVX512] in { 6600 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6601 _.info512, Ctrl.info512>, EVEX_V512; 6602 } 6603 let Predicates = [HasAVX512, HasVLX] in { 6604 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6605 _.info128, Ctrl.info128>, EVEX_V128; 6606 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6607 _.info256, Ctrl.info256>, EVEX_V256; 6608 } 6609} 6610 6611multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6612 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6613 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6614 _, Ctrl>; 6615 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6616 X86VPermilpi, SchedWriteFShuffle, _>, 6617 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6618} 6619 6620let ExeDomain = SSEPackedSingle in 6621defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6622 avx512vl_i32_info>; 6623let ExeDomain = SSEPackedDouble in 6624defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6625 avx512vl_i64_info>, VEX_W1X; 6626 6627//===----------------------------------------------------------------------===// 6628// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6629//===----------------------------------------------------------------------===// 6630 6631defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6632 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6633 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6634defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6635 X86PShufhw, SchedWriteShuffle>, 6636 EVEX, AVX512XSIi8Base; 6637defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6638 X86PShuflw, SchedWriteShuffle>, 6639 EVEX, AVX512XDIi8Base; 6640 6641//===----------------------------------------------------------------------===// 6642// AVX-512 - VPSHUFB 6643//===----------------------------------------------------------------------===// 6644 6645multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6646 X86SchedWriteWidths sched> { 6647 let Predicates = [HasBWI] in 6648 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6649 EVEX_V512; 6650 6651 let Predicates = [HasVLX, HasBWI] in { 6652 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6653 EVEX_V256; 6654 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6655 EVEX_V128; 6656 } 6657} 6658 6659defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6660 SchedWriteVarShuffle>, VEX_WIG; 6661 6662//===----------------------------------------------------------------------===// 6663// Move Low to High and High to Low packed FP Instructions 6664//===----------------------------------------------------------------------===// 6665 6666def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6667 (ins VR128X:$src1, VR128X:$src2), 6668 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6669 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6670 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6671let isCommutable = 1 in 6672def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6673 (ins VR128X:$src1, VR128X:$src2), 6674 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6675 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6676 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6677 6678//===----------------------------------------------------------------------===// 6679// VMOVHPS/PD VMOVLPS Instructions 6680// All patterns was taken from SSS implementation. 6681//===----------------------------------------------------------------------===// 6682 6683multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6684 SDPatternOperator OpNode, 6685 X86VectorVTInfo _> { 6686 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6687 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6688 (ins _.RC:$src1, f64mem:$src2), 6689 !strconcat(OpcodeStr, 6690 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6691 [(set _.RC:$dst, 6692 (OpNode _.RC:$src1, 6693 (_.VT (bitconvert 6694 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6695 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6696} 6697 6698// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6699// SSE1. And MOVLPS pattern is even more complex. 6700defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6701 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6702defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6703 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6704defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6705 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6706defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6707 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6708 6709let Predicates = [HasAVX512] in { 6710 // VMOVHPD patterns 6711 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6712 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6713 6714 // VMOVLPD patterns 6715 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6716 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6717} 6718 6719let SchedRW = [WriteFStore] in { 6720let mayStore = 1, hasSideEffects = 0 in 6721def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6722 (ins f64mem:$dst, VR128X:$src), 6723 "vmovhps\t{$src, $dst|$dst, $src}", 6724 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6725def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6726 (ins f64mem:$dst, VR128X:$src), 6727 "vmovhpd\t{$src, $dst|$dst, $src}", 6728 [(store (f64 (extractelt 6729 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6730 (iPTR 0))), addr:$dst)]>, 6731 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6732let mayStore = 1, hasSideEffects = 0 in 6733def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6734 (ins f64mem:$dst, VR128X:$src), 6735 "vmovlps\t{$src, $dst|$dst, $src}", 6736 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6737def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6738 (ins f64mem:$dst, VR128X:$src), 6739 "vmovlpd\t{$src, $dst|$dst, $src}", 6740 [(store (f64 (extractelt (v2f64 VR128X:$src), 6741 (iPTR 0))), addr:$dst)]>, 6742 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6743} // SchedRW 6744 6745let Predicates = [HasAVX512] in { 6746 // VMOVHPD patterns 6747 def : Pat<(store (f64 (extractelt 6748 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6749 (iPTR 0))), addr:$dst), 6750 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6751} 6752//===----------------------------------------------------------------------===// 6753// FMA - Fused Multiply Operations 6754// 6755 6756multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6757 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6758 X86VectorVTInfo _> { 6759 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6760 Uses = [MXCSR], mayRaiseFPException = 1 in { 6761 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6762 (ins _.RC:$src2, _.RC:$src3), 6763 OpcodeStr, "$src3, $src2", "$src2, $src3", 6764 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6765 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6766 EVEX_4V, Sched<[sched]>; 6767 6768 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6769 (ins _.RC:$src2, _.MemOp:$src3), 6770 OpcodeStr, "$src3, $src2", "$src2, $src3", 6771 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6772 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6773 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6774 6775 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6776 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6777 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6778 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6779 (OpNode _.RC:$src2, 6780 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6781 (MaskOpNode _.RC:$src2, 6782 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6783 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6784 } 6785} 6786 6787multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6788 X86FoldableSchedWrite sched, 6789 X86VectorVTInfo _> { 6790 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6791 Uses = [MXCSR] in 6792 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6793 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6794 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6795 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6796 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6797 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6798} 6799 6800multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6801 SDNode MaskOpNode, SDNode OpNodeRnd, 6802 X86SchedWriteWidths sched, 6803 AVX512VLVectorVTInfo _, 6804 Predicate prd = HasAVX512> { 6805 let Predicates = [prd] in { 6806 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6807 sched.ZMM, _.info512>, 6808 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6809 _.info512>, 6810 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6811 } 6812 let Predicates = [HasVLX, prd] in { 6813 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6814 sched.YMM, _.info256>, 6815 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6816 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6817 sched.XMM, _.info128>, 6818 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6819 } 6820} 6821 6822multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6823 SDNode MaskOpNode, SDNode OpNodeRnd> { 6824 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6825 OpNodeRnd, SchedWriteFMA, 6826 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6827 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6828 OpNodeRnd, SchedWriteFMA, 6829 avx512vl_f32_info>, T8PD; 6830 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6831 OpNodeRnd, SchedWriteFMA, 6832 avx512vl_f64_info>, T8PD, VEX_W; 6833} 6834 6835defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6836 fma, X86FmaddRnd>; 6837defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6838 X86Fmsub, X86FmsubRnd>; 6839defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6840 X86Fmaddsub, X86FmaddsubRnd>; 6841defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6842 X86Fmsubadd, X86FmsubaddRnd>; 6843defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6844 X86Fnmadd, X86FnmaddRnd>; 6845defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6846 X86Fnmsub, X86FnmsubRnd>; 6847 6848 6849multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6850 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6851 X86VectorVTInfo _> { 6852 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6853 Uses = [MXCSR], mayRaiseFPException = 1 in { 6854 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6855 (ins _.RC:$src2, _.RC:$src3), 6856 OpcodeStr, "$src3, $src2", "$src2, $src3", 6857 (null_frag), 6858 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6859 EVEX_4V, Sched<[sched]>; 6860 6861 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6862 (ins _.RC:$src2, _.MemOp:$src3), 6863 OpcodeStr, "$src3, $src2", "$src2, $src3", 6864 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6865 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6866 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6867 6868 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6869 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6870 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6871 "$src2, ${src3}"#_.BroadcastStr, 6872 (_.VT (OpNode _.RC:$src2, 6873 (_.VT (_.BroadcastLdFrag addr:$src3)), 6874 _.RC:$src1)), 6875 (_.VT (MaskOpNode _.RC:$src2, 6876 (_.VT (_.BroadcastLdFrag addr:$src3)), 6877 _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B, 6878 Sched<[sched.Folded, sched.ReadAfterFold]>; 6879 } 6880} 6881 6882multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6883 X86FoldableSchedWrite sched, 6884 X86VectorVTInfo _> { 6885 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6886 Uses = [MXCSR] in 6887 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6888 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6889 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6890 (null_frag), 6891 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6892 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6893} 6894 6895multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6896 SDNode MaskOpNode, SDNode OpNodeRnd, 6897 X86SchedWriteWidths sched, 6898 AVX512VLVectorVTInfo _, 6899 Predicate prd = HasAVX512> { 6900 let Predicates = [prd] in { 6901 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6902 sched.ZMM, _.info512>, 6903 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6904 _.info512>, 6905 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6906 } 6907 let Predicates = [HasVLX, prd] in { 6908 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6909 sched.YMM, _.info256>, 6910 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6911 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6912 sched.XMM, _.info128>, 6913 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6914 } 6915} 6916 6917multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6918 SDNode MaskOpNode, SDNode OpNodeRnd > { 6919 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6920 OpNodeRnd, SchedWriteFMA, 6921 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6922 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6923 OpNodeRnd, SchedWriteFMA, 6924 avx512vl_f32_info>, T8PD; 6925 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6926 OpNodeRnd, SchedWriteFMA, 6927 avx512vl_f64_info>, T8PD, VEX_W; 6928} 6929 6930defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6931 fma, X86FmaddRnd>; 6932defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6933 X86Fmsub, X86FmsubRnd>; 6934defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6935 X86Fmaddsub, X86FmaddsubRnd>; 6936defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6937 X86Fmsubadd, X86FmsubaddRnd>; 6938defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6939 X86Fnmadd, X86FnmaddRnd>; 6940defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6941 X86Fnmsub, X86FnmsubRnd>; 6942 6943multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6944 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6945 X86VectorVTInfo _> { 6946 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6947 Uses = [MXCSR], mayRaiseFPException = 1 in { 6948 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6949 (ins _.RC:$src2, _.RC:$src3), 6950 OpcodeStr, "$src3, $src2", "$src2, $src3", 6951 (null_frag), 6952 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6953 EVEX_4V, Sched<[sched]>; 6954 6955 // Pattern is 312 order so that the load is in a different place from the 6956 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6957 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6958 (ins _.RC:$src2, _.MemOp:$src3), 6959 OpcodeStr, "$src3, $src2", "$src2, $src3", 6960 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6961 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6962 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6963 6964 // Pattern is 312 order so that the load is in a different place from the 6965 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6966 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6967 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6968 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6969 "$src2, ${src3}"#_.BroadcastStr, 6970 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6971 _.RC:$src1, _.RC:$src2)), 6972 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6973 _.RC:$src1, _.RC:$src2)), 1, 0>, 6974 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6975 } 6976} 6977 6978multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6979 X86FoldableSchedWrite sched, 6980 X86VectorVTInfo _> { 6981 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6982 Uses = [MXCSR] in 6983 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6984 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6985 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6986 (null_frag), 6987 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6988 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6989} 6990 6991multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6992 SDNode MaskOpNode, SDNode OpNodeRnd, 6993 X86SchedWriteWidths sched, 6994 AVX512VLVectorVTInfo _, 6995 Predicate prd = HasAVX512> { 6996 let Predicates = [prd] in { 6997 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6998 sched.ZMM, _.info512>, 6999 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 7000 _.info512>, 7001 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7002 } 7003 let Predicates = [HasVLX, prd] in { 7004 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7005 sched.YMM, _.info256>, 7006 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7007 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7008 sched.XMM, _.info128>, 7009 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7010 } 7011} 7012 7013multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7014 SDNode MaskOpNode, SDNode OpNodeRnd > { 7015 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 7016 OpNodeRnd, SchedWriteFMA, 7017 avx512vl_f16_info, HasFP16>, T_MAP6PD; 7018 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 7019 OpNodeRnd, SchedWriteFMA, 7020 avx512vl_f32_info>, T8PD; 7021 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 7022 OpNodeRnd, SchedWriteFMA, 7023 avx512vl_f64_info>, T8PD, VEX_W; 7024} 7025 7026defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 7027 fma, X86FmaddRnd>; 7028defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 7029 X86Fmsub, X86FmsubRnd>; 7030defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 7031 X86Fmaddsub, X86FmaddsubRnd>; 7032defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 7033 X86Fmsubadd, X86FmsubaddRnd>; 7034defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 7035 X86Fnmadd, X86FnmaddRnd>; 7036defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 7037 X86Fnmsub, X86FnmsubRnd>; 7038 7039// Scalar FMA 7040multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7041 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 7042let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 7043 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7044 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 7045 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7046 EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 7047 7048 let mayLoad = 1 in 7049 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7050 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 7051 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7052 EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 7053 7054 let Uses = [MXCSR] in 7055 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7056 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 7057 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 7058 EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 7059 7060 let isCodeGenOnly = 1, isCommutable = 1 in { 7061 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7062 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 7063 !strconcat(OpcodeStr, 7064 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7065 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC; 7066 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 7067 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 7068 !strconcat(OpcodeStr, 7069 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7070 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC; 7071 7072 let Uses = [MXCSR] in 7073 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7074 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 7075 !strconcat(OpcodeStr, 7076 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 7077 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 7078 Sched<[SchedWriteFMA.Scl]>, EVEX_4V; 7079 }// isCodeGenOnly = 1 7080}// Constraints = "$src1 = $dst" 7081} 7082 7083multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7084 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 7085 X86VectorVTInfo _, string SUFF> { 7086 let ExeDomain = _.ExeDomain in { 7087 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 7088 // Operands for intrinsic are in 123 order to preserve passthu 7089 // semantics. 7090 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7091 _.FRC:$src3))), 7092 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7093 (_.ScalarLdFrag addr:$src3)))), 7094 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 7095 _.FRC:$src3, (i32 timm:$rc)))), 0>; 7096 7097 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 7098 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 7099 _.FRC:$src1))), 7100 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 7101 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 7102 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 7103 _.FRC:$src1, (i32 timm:$rc)))), 1>; 7104 7105 // One pattern is 312 order so that the load is in a different place from the 7106 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7107 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 7108 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 7109 _.FRC:$src2))), 7110 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 7111 _.FRC:$src1, _.FRC:$src2))), 7112 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 7113 _.FRC:$src2, (i32 timm:$rc)))), 1>; 7114 } 7115} 7116 7117multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7118 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 7119 let Predicates = [HasAVX512] in { 7120 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7121 OpNodeRnd, f32x_info, "SS">, 7122 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD; 7123 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7124 OpNodeRnd, f64x_info, "SD">, 7125 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD; 7126 } 7127 let Predicates = [HasFP16] in { 7128 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7129 OpNodeRnd, f16x_info, "SH">, 7130 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD; 7131 } 7132} 7133 7134defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 7135defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 7136defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 7137defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 7138 7139multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 7140 SDNode RndOp, string Prefix, 7141 string Suffix, SDNode Move, 7142 X86VectorVTInfo _, PatLeaf ZeroFP, 7143 Predicate prd = HasAVX512> { 7144 let Predicates = [prd] in { 7145 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7146 (Op _.FRC:$src2, 7147 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7148 _.FRC:$src3))))), 7149 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 7150 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7151 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7152 7153 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7154 (Op _.FRC:$src2, _.FRC:$src3, 7155 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7156 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 7157 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7158 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7159 7160 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7161 (Op _.FRC:$src2, 7162 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7163 (_.ScalarLdFrag addr:$src3)))))), 7164 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 7165 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7166 addr:$src3)>; 7167 7168 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7169 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7170 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 7171 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 7172 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7173 addr:$src3)>; 7174 7175 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7176 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7177 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7178 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 7179 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7180 addr:$src3)>; 7181 7182 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7183 (X86selects_mask VK1WM:$mask, 7184 (MaskedOp _.FRC:$src2, 7185 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7186 _.FRC:$src3), 7187 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7188 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 7189 VR128X:$src1, VK1WM:$mask, 7190 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7191 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7192 7193 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7194 (X86selects_mask VK1WM:$mask, 7195 (MaskedOp _.FRC:$src2, 7196 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7197 (_.ScalarLdFrag addr:$src3)), 7198 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7199 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 7200 VR128X:$src1, VK1WM:$mask, 7201 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7202 7203 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7204 (X86selects_mask VK1WM:$mask, 7205 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7206 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 7207 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7208 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 7209 VR128X:$src1, VK1WM:$mask, 7210 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7211 7212 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7213 (X86selects_mask VK1WM:$mask, 7214 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7215 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7216 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7217 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 7218 VR128X:$src1, VK1WM:$mask, 7219 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7220 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7221 7222 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7223 (X86selects_mask VK1WM:$mask, 7224 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7225 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7226 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7227 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 7228 VR128X:$src1, VK1WM:$mask, 7229 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7230 7231 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7232 (X86selects_mask VK1WM:$mask, 7233 (MaskedOp _.FRC:$src2, 7234 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7235 _.FRC:$src3), 7236 (_.EltVT ZeroFP)))))), 7237 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 7238 VR128X:$src1, VK1WM:$mask, 7239 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7240 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7241 7242 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7243 (X86selects_mask VK1WM:$mask, 7244 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7245 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7246 (_.EltVT ZeroFP)))))), 7247 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 7248 VR128X:$src1, VK1WM:$mask, 7249 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7250 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7251 7252 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7253 (X86selects_mask VK1WM:$mask, 7254 (MaskedOp _.FRC:$src2, 7255 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7256 (_.ScalarLdFrag addr:$src3)), 7257 (_.EltVT ZeroFP)))))), 7258 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 7259 VR128X:$src1, VK1WM:$mask, 7260 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7261 7262 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7263 (X86selects_mask VK1WM:$mask, 7264 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7265 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7266 (_.EltVT ZeroFP)))))), 7267 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 7268 VR128X:$src1, VK1WM:$mask, 7269 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7270 7271 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7272 (X86selects_mask VK1WM:$mask, 7273 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7274 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7275 (_.EltVT ZeroFP)))))), 7276 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 7277 VR128X:$src1, VK1WM:$mask, 7278 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7279 7280 // Patterns with rounding mode. 7281 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7282 (RndOp _.FRC:$src2, 7283 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7284 _.FRC:$src3, (i32 timm:$rc)))))), 7285 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7286 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7287 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7288 7289 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7290 (RndOp _.FRC:$src2, _.FRC:$src3, 7291 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7292 (i32 timm:$rc)))))), 7293 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7294 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7295 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7296 7297 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7298 (X86selects_mask VK1WM:$mask, 7299 (RndOp _.FRC:$src2, 7300 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7301 _.FRC:$src3, (i32 timm:$rc)), 7302 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7303 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 7304 VR128X:$src1, VK1WM:$mask, 7305 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7306 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7307 7308 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7309 (X86selects_mask VK1WM:$mask, 7310 (RndOp _.FRC:$src2, _.FRC:$src3, 7311 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7312 (i32 timm:$rc)), 7313 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7314 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7315 VR128X:$src1, VK1WM:$mask, 7316 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7317 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7318 7319 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7320 (X86selects_mask VK1WM:$mask, 7321 (RndOp _.FRC:$src2, 7322 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7323 _.FRC:$src3, (i32 timm:$rc)), 7324 (_.EltVT ZeroFP)))))), 7325 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7326 VR128X:$src1, VK1WM:$mask, 7327 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7328 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7329 7330 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7331 (X86selects_mask VK1WM:$mask, 7332 (RndOp _.FRC:$src2, _.FRC:$src3, 7333 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7334 (i32 timm:$rc)), 7335 (_.EltVT ZeroFP)))))), 7336 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7337 VR128X:$src1, VK1WM:$mask, 7338 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7339 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7340 } 7341} 7342defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7343 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7344defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7345 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7346defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7347 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7348defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7349 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7350 7351defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7352 "SS", X86Movss, v4f32x_info, fp32imm0>; 7353defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7354 "SS", X86Movss, v4f32x_info, fp32imm0>; 7355defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7356 "SS", X86Movss, v4f32x_info, fp32imm0>; 7357defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7358 "SS", X86Movss, v4f32x_info, fp32imm0>; 7359 7360defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7361 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7362defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7363 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7364defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7365 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7366defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7367 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7368 7369//===----------------------------------------------------------------------===// 7370// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7371//===----------------------------------------------------------------------===// 7372let Constraints = "$src1 = $dst" in { 7373multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7374 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7375 // NOTE: The SDNode have the multiply operands first with the add last. 7376 // This enables commuted load patterns to be autogenerated by tablegen. 7377 let ExeDomain = _.ExeDomain in { 7378 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7379 (ins _.RC:$src2, _.RC:$src3), 7380 OpcodeStr, "$src3, $src2", "$src2, $src3", 7381 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7382 T8PD, EVEX_4V, Sched<[sched]>; 7383 7384 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7385 (ins _.RC:$src2, _.MemOp:$src3), 7386 OpcodeStr, "$src3, $src2", "$src2, $src3", 7387 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7388 T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7389 7390 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7391 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7392 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7393 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7394 (OpNode _.RC:$src2, 7395 (_.VT (_.BroadcastLdFrag addr:$src3)), 7396 _.RC:$src1)>, 7397 T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 7398 } 7399} 7400} // Constraints = "$src1 = $dst" 7401 7402multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7403 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7404 let Predicates = [HasIFMA] in { 7405 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7406 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7407 } 7408 let Predicates = [HasVLX, HasIFMA] in { 7409 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7410 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7411 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7412 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7413 } 7414} 7415 7416defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7417 SchedWriteVecIMul, avx512vl_i64_info>, 7418 VEX_W; 7419defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7420 SchedWriteVecIMul, avx512vl_i64_info>, 7421 VEX_W; 7422 7423//===----------------------------------------------------------------------===// 7424// AVX-512 Scalar convert from sign integer to float/double 7425//===----------------------------------------------------------------------===// 7426 7427multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7428 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7429 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7430 string mem, list<Register> _Uses = [MXCSR], 7431 bit _mayRaiseFPException = 1> { 7432let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7433 mayRaiseFPException = _mayRaiseFPException in { 7434 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7435 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7436 (ins DstVT.FRC:$src1, SrcRC:$src), 7437 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7438 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7439 let mayLoad = 1 in 7440 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7441 (ins DstVT.FRC:$src1, x86memop:$src), 7442 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7443 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7444 } // hasSideEffects = 0 7445 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7446 (ins DstVT.RC:$src1, SrcRC:$src2), 7447 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7448 [(set DstVT.RC:$dst, 7449 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7450 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7451 7452 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7453 (ins DstVT.RC:$src1, x86memop:$src2), 7454 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7455 [(set DstVT.RC:$dst, 7456 (OpNode (DstVT.VT DstVT.RC:$src1), 7457 (ld_frag addr:$src2)))]>, 7458 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7459} 7460 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7461 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7462 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7463} 7464 7465multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7466 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7467 X86VectorVTInfo DstVT, string asm, 7468 string mem> { 7469 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7470 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7471 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7472 !strconcat(asm, 7473 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7474 [(set DstVT.RC:$dst, 7475 (OpNode (DstVT.VT DstVT.RC:$src1), 7476 SrcRC:$src2, 7477 (i32 timm:$rc)))]>, 7478 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7479 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7480 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7481 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7482} 7483 7484multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7485 X86FoldableSchedWrite sched, 7486 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7487 X86MemOperand x86memop, PatFrag ld_frag, 7488 string asm, string mem> { 7489 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7490 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7491 ld_frag, asm, mem>, VEX_LIG; 7492} 7493 7494let Predicates = [HasAVX512] in { 7495defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7496 WriteCvtI2SS, GR32, 7497 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7498 XS, EVEX_CD8<32, CD8VT1>; 7499defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7500 WriteCvtI2SS, GR64, 7501 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7502 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7503defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7504 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7505 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7506defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7507 WriteCvtI2SD, GR64, 7508 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7509 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7510 7511def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7512 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7513def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7514 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7515 7516def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7517 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7518def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7519 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7520def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7521 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7522def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7523 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7524 7525def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7526 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7527def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7528 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7529def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7530 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7531def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7532 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7533 7534defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7535 WriteCvtI2SS, GR32, 7536 v4f32x_info, i32mem, loadi32, 7537 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7538defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7539 WriteCvtI2SS, GR64, 7540 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7541 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7542defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7543 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7544 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7545defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7546 WriteCvtI2SD, GR64, 7547 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7548 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7549 7550def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7551 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7552def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7553 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7554 7555def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7556 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7557def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7558 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7559def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7560 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7561def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7562 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7563 7564def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7565 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7566def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7567 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7568def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7569 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7570def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7571 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7572} 7573 7574//===----------------------------------------------------------------------===// 7575// AVX-512 Scalar convert from float/double to integer 7576//===----------------------------------------------------------------------===// 7577 7578multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7579 X86VectorVTInfo DstVT, SDNode OpNode, 7580 SDNode OpNodeRnd, 7581 X86FoldableSchedWrite sched, string asm, 7582 string aliasStr, Predicate prd = HasAVX512> { 7583 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7584 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7585 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7586 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7587 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7588 let Uses = [MXCSR] in 7589 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7590 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7591 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7592 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7593 Sched<[sched]>; 7594 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7595 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7596 [(set DstVT.RC:$dst, (OpNode 7597 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7598 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7599 } // Predicates = [prd] 7600 7601 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7602 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7603 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7604 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7605 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7606 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7607 SrcVT.IntScalarMemOp:$src), 0, "att">; 7608} 7609 7610// Convert float/double to signed/unsigned int 32/64 7611defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7612 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7613 XS, EVEX_CD8<32, CD8VT1>; 7614defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7615 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7616 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7617defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7618 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7619 XS, EVEX_CD8<32, CD8VT1>; 7620defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7621 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7622 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7623defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7624 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7625 XD, EVEX_CD8<64, CD8VT1>; 7626defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7627 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7628 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7629defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7630 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7631 XD, EVEX_CD8<64, CD8VT1>; 7632defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7633 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7634 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7635 7636multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7637 X86VectorVTInfo DstVT, SDNode OpNode, 7638 X86FoldableSchedWrite sched> { 7639 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7640 let isCodeGenOnly = 1 in { 7641 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7642 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7643 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7644 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7645 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7646 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7647 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7648 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7649 } 7650 } // Predicates = [HasAVX512] 7651} 7652 7653defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7654 lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>; 7655defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7656 llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7657defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7658 lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>; 7659defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7660 llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7661 7662let Predicates = [HasAVX512] in { 7663 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7664 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7665 7666 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7667 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7668} 7669 7670// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7671// which produce unnecessary vmovs{s,d} instructions 7672let Predicates = [HasAVX512] in { 7673def : Pat<(v4f32 (X86Movss 7674 (v4f32 VR128X:$dst), 7675 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7676 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7677 7678def : Pat<(v4f32 (X86Movss 7679 (v4f32 VR128X:$dst), 7680 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7681 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7682 7683def : Pat<(v4f32 (X86Movss 7684 (v4f32 VR128X:$dst), 7685 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7686 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7687 7688def : Pat<(v4f32 (X86Movss 7689 (v4f32 VR128X:$dst), 7690 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7691 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7692 7693def : Pat<(v2f64 (X86Movsd 7694 (v2f64 VR128X:$dst), 7695 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7696 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7697 7698def : Pat<(v2f64 (X86Movsd 7699 (v2f64 VR128X:$dst), 7700 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7701 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7702 7703def : Pat<(v2f64 (X86Movsd 7704 (v2f64 VR128X:$dst), 7705 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7706 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7707 7708def : Pat<(v2f64 (X86Movsd 7709 (v2f64 VR128X:$dst), 7710 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7711 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7712 7713def : Pat<(v4f32 (X86Movss 7714 (v4f32 VR128X:$dst), 7715 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7716 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7717 7718def : Pat<(v4f32 (X86Movss 7719 (v4f32 VR128X:$dst), 7720 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7721 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7722 7723def : Pat<(v4f32 (X86Movss 7724 (v4f32 VR128X:$dst), 7725 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7726 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7727 7728def : Pat<(v4f32 (X86Movss 7729 (v4f32 VR128X:$dst), 7730 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7731 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7732 7733def : Pat<(v2f64 (X86Movsd 7734 (v2f64 VR128X:$dst), 7735 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7736 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7737 7738def : Pat<(v2f64 (X86Movsd 7739 (v2f64 VR128X:$dst), 7740 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7741 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7742 7743def : Pat<(v2f64 (X86Movsd 7744 (v2f64 VR128X:$dst), 7745 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7746 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7747 7748def : Pat<(v2f64 (X86Movsd 7749 (v2f64 VR128X:$dst), 7750 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7751 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7752} // Predicates = [HasAVX512] 7753 7754// Convert float/double to signed/unsigned int 32/64 with truncation 7755multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7756 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7757 SDNode OpNodeInt, SDNode OpNodeSAE, 7758 X86FoldableSchedWrite sched, string aliasStr, 7759 Predicate prd = HasAVX512> { 7760let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7761 let isCodeGenOnly = 1 in { 7762 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7763 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7764 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7765 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7766 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7767 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7768 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7769 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7770 } 7771 7772 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7773 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7774 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7775 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7776 let Uses = [MXCSR] in 7777 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7778 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7779 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7780 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7781 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7782 (ins _SrcRC.IntScalarMemOp:$src), 7783 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7784 [(set _DstRC.RC:$dst, 7785 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7786 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7787} // Predicates = [prd] 7788 7789 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7790 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7791 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7792 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7793 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7794 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7795 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7796} 7797 7798defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7799 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7800 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7801defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7802 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7803 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7804defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7805 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7806 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7807defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7808 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7809 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7810 7811defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7812 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7813 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7814defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7815 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7816 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7817defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7818 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7819 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7820defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7821 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7822 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7823 7824//===----------------------------------------------------------------------===// 7825// AVX-512 Convert form float to double and back 7826//===----------------------------------------------------------------------===// 7827 7828let Uses = [MXCSR], mayRaiseFPException = 1 in 7829multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7830 X86VectorVTInfo _Src, SDNode OpNode, 7831 X86FoldableSchedWrite sched> { 7832 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7833 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7834 "$src2, $src1", "$src1, $src2", 7835 (_.VT (OpNode (_.VT _.RC:$src1), 7836 (_Src.VT _Src.RC:$src2)))>, 7837 EVEX_4V, VEX_LIG, Sched<[sched]>; 7838 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7839 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7840 "$src2, $src1", "$src1, $src2", 7841 (_.VT (OpNode (_.VT _.RC:$src1), 7842 (_Src.ScalarIntMemFrags addr:$src2)))>, 7843 EVEX_4V, VEX_LIG, 7844 Sched<[sched.Folded, sched.ReadAfterFold]>; 7845 7846 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7847 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7848 (ins _.FRC:$src1, _Src.FRC:$src2), 7849 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7850 EVEX_4V, VEX_LIG, Sched<[sched]>; 7851 let mayLoad = 1 in 7852 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7853 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7854 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7855 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7856 } 7857} 7858 7859// Scalar Conversion with SAE - suppress all exceptions 7860multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7861 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7862 X86FoldableSchedWrite sched> { 7863 let Uses = [MXCSR] in 7864 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7865 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7866 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7867 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7868 (_Src.VT _Src.RC:$src2)))>, 7869 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7870} 7871 7872// Scalar Conversion with rounding control (RC) 7873multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7874 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7875 X86FoldableSchedWrite sched> { 7876 let Uses = [MXCSR] in 7877 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7878 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7879 "$rc, $src2, $src1", "$src1, $src2, $rc", 7880 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7881 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7882 EVEX_4V, VEX_LIG, Sched<[sched]>, 7883 EVEX_B, EVEX_RC; 7884} 7885multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7886 SDNode OpNode, SDNode OpNodeRnd, 7887 X86FoldableSchedWrite sched, 7888 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7889 Predicate prd = HasAVX512> { 7890 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7891 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7892 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7893 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7894 } 7895} 7896 7897multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7898 SDNode OpNode, SDNode OpNodeSAE, 7899 X86FoldableSchedWrite sched, 7900 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7901 Predicate prd = HasAVX512> { 7902 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7903 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7904 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7905 EVEX_CD8<_src.EltSize, CD8VT1>; 7906 } 7907} 7908defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7909 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7910 f32x_info>, XD, VEX_W; 7911defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7912 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7913 f64x_info>, XS; 7914defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7915 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7916 f16x_info, HasFP16>, T_MAP5XD, VEX_W; 7917defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7918 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7919 f64x_info, HasFP16>, T_MAP5XS; 7920defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7921 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7922 f16x_info, HasFP16>, T_MAP5PS; 7923defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7924 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7925 f32x_info, HasFP16>, T_MAP6PS; 7926 7927def : Pat<(f64 (any_fpextend FR32X:$src)), 7928 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7929 Requires<[HasAVX512]>; 7930def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7931 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7932 Requires<[HasAVX512, OptForSize]>; 7933 7934def : Pat<(f32 (any_fpround FR64X:$src)), 7935 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7936 Requires<[HasAVX512]>; 7937 7938def : Pat<(f32 (any_fpextend FR16X:$src)), 7939 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 7940 Requires<[HasFP16]>; 7941def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 7942 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 7943 Requires<[HasFP16, OptForSize]>; 7944 7945def : Pat<(f64 (any_fpextend FR16X:$src)), 7946 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 7947 Requires<[HasFP16]>; 7948def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 7949 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7950 Requires<[HasFP16, OptForSize]>; 7951 7952def : Pat<(f16 (any_fpround FR32X:$src)), 7953 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 7954 Requires<[HasFP16]>; 7955def : Pat<(f16 (any_fpround FR64X:$src)), 7956 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 7957 Requires<[HasFP16]>; 7958 7959def : Pat<(v4f32 (X86Movss 7960 (v4f32 VR128X:$dst), 7961 (v4f32 (scalar_to_vector 7962 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7963 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7964 Requires<[HasAVX512]>; 7965 7966def : Pat<(v2f64 (X86Movsd 7967 (v2f64 VR128X:$dst), 7968 (v2f64 (scalar_to_vector 7969 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7970 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7971 Requires<[HasAVX512]>; 7972 7973//===----------------------------------------------------------------------===// 7974// AVX-512 Vector convert from signed/unsigned integer to float/double 7975// and from float/double to signed/unsigned integer 7976//===----------------------------------------------------------------------===// 7977 7978multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7979 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 7980 X86FoldableSchedWrite sched, 7981 string Broadcast = _.BroadcastStr, 7982 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7983 RegisterClass MaskRC = _.KRCWM, 7984 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7985 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7986let Uses = [MXCSR], mayRaiseFPException = 1 in { 7987 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7988 (ins _Src.RC:$src), 7989 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7990 (ins MaskRC:$mask, _Src.RC:$src), 7991 OpcodeStr, "$src", "$src", 7992 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7993 (vselect_mask MaskRC:$mask, 7994 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7995 _.RC:$src0), 7996 (vselect_mask MaskRC:$mask, 7997 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7998 _.ImmAllZerosV)>, 7999 EVEX, Sched<[sched]>; 8000 8001 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8002 (ins MemOp:$src), 8003 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 8004 (ins MaskRC:$mask, MemOp:$src), 8005 OpcodeStr#Alias, "$src", "$src", 8006 LdDAG, 8007 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 8008 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 8009 EVEX, Sched<[sched.Folded]>; 8010 8011 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8012 (ins _Src.ScalarMemOp:$src), 8013 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 8014 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 8015 OpcodeStr, 8016 "${src}"#Broadcast, "${src}"#Broadcast, 8017 (_.VT (OpNode (_Src.VT 8018 (_Src.BroadcastLdFrag addr:$src)) 8019 )), 8020 (vselect_mask MaskRC:$mask, 8021 (_.VT 8022 (MaskOpNode 8023 (_Src.VT 8024 (_Src.BroadcastLdFrag addr:$src)))), 8025 _.RC:$src0), 8026 (vselect_mask MaskRC:$mask, 8027 (_.VT 8028 (MaskOpNode 8029 (_Src.VT 8030 (_Src.BroadcastLdFrag addr:$src)))), 8031 _.ImmAllZerosV)>, 8032 EVEX, EVEX_B, Sched<[sched.Folded]>; 8033 } 8034} 8035// Conversion with SAE - suppress all exceptions 8036multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8037 X86VectorVTInfo _Src, SDNode OpNodeSAE, 8038 X86FoldableSchedWrite sched> { 8039 let Uses = [MXCSR] in 8040 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8041 (ins _Src.RC:$src), OpcodeStr, 8042 "{sae}, $src", "$src, {sae}", 8043 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 8044 EVEX, EVEX_B, Sched<[sched]>; 8045} 8046 8047// Conversion with rounding control (RC) 8048multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8049 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 8050 X86FoldableSchedWrite sched> { 8051 let Uses = [MXCSR] in 8052 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8053 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 8054 "$rc, $src", "$src, $rc", 8055 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 8056 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8057} 8058 8059// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 8060multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8061 X86VectorVTInfo _Src, SDPatternOperator OpNode, 8062 SDNode MaskOpNode, 8063 X86FoldableSchedWrite sched, 8064 string Broadcast = _.BroadcastStr, 8065 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 8066 RegisterClass MaskRC = _.KRCWM> 8067 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 8068 Alias, MemOp, MaskRC, 8069 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 8070 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 8071 8072// Extend [Float to Double, Half to Float] 8073multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 8074 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8075 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 8076 let Predicates = [prd] in { 8077 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 8078 any_fpextend, fpextend, sched.ZMM>, 8079 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 8080 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8081 } 8082 let Predicates = [prd, HasVLX] in { 8083 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 8084 X86any_vfpext, X86vfpext, sched.XMM, 8085 _dst.info128.BroadcastStr, 8086 "", f64mem>, EVEX_V128; 8087 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 8088 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 8089 } 8090} 8091 8092// Truncate [Double to Float, Float to Half] 8093multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 8094 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8095 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 8096 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 8097 PatFrag loadVT128 = _src.info128.LdFrag, 8098 RegisterClass maskRC128 = _src.info128.KRCWM> { 8099 let Predicates = [prd] in { 8100 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 8101 X86any_vfpround, X86vfpround, sched.ZMM>, 8102 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8103 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8104 } 8105 let Predicates = [prd, HasVLX] in { 8106 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 8107 null_frag, null_frag, sched.XMM, 8108 _src.info128.BroadcastStr, "{x}", 8109 f128mem, maskRC128>, EVEX_V128; 8110 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 8111 X86any_vfpround, X86vfpround, 8112 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 8113 8114 // Special patterns to allow use of X86vmfpround for masking. Instruction 8115 // patterns have been disabled with null_frag. 8116 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 8117 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8118 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8119 maskRC128:$mask), 8120 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 8121 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8122 maskRC128:$mask), 8123 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 8124 8125 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 8126 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8127 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 8128 maskRC128:$mask), 8129 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8130 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 8131 maskRC128:$mask), 8132 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 8133 8134 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 8135 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8136 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8137 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 8138 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8139 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8140 _dst.info128.ImmAllZerosV, maskRC128:$mask), 8141 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 8142 } 8143 8144 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8145 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8146 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8147 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8148 VK2WM:$mask, VR128X:$src), 0, "att">; 8149 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 8150 "$dst {${mask}} {z}, $src}", 8151 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8152 VK2WM:$mask, VR128X:$src), 0, "att">; 8153 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8154 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8155 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8156 "$dst {${mask}}, ${src}{1to2}}", 8157 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8158 VK2WM:$mask, f64mem:$src), 0, "att">; 8159 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8160 "$dst {${mask}} {z}, ${src}{1to2}}", 8161 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8162 VK2WM:$mask, f64mem:$src), 0, "att">; 8163 8164 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8165 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8166 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8167 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8168 VK4WM:$mask, VR256X:$src), 0, "att">; 8169 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8170 "$dst {${mask}} {z}, $src}", 8171 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8172 VK4WM:$mask, VR256X:$src), 0, "att">; 8173 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8174 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8175 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8176 "$dst {${mask}}, ${src}{1to4}}", 8177 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8178 VK4WM:$mask, f64mem:$src), 0, "att">; 8179 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8180 "$dst {${mask}} {z}, ${src}{1to4}}", 8181 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8182 VK4WM:$mask, f64mem:$src), 0, "att">; 8183} 8184 8185defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 8186 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 8187 VEX_W, PD, EVEX_CD8<64, CD8VF>; 8188defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 8189 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 8190 PS, EVEX_CD8<32, CD8VH>; 8191 8192// Extend Half to Double 8193multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 8194 X86SchedWriteWidths sched> { 8195 let Predicates = [HasFP16] in { 8196 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 8197 any_fpextend, fpextend, sched.ZMM>, 8198 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 8199 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8200 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 8201 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 8202 } 8203 let Predicates = [HasFP16, HasVLX] in { 8204 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 8205 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 8206 f32mem>, EVEX_V128; 8207 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 8208 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 8209 f64mem>, EVEX_V256; 8210 } 8211} 8212 8213// Truncate Double to Half 8214multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 8215 let Predicates = [HasFP16] in { 8216 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8217 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8218 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8219 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8220 } 8221 let Predicates = [HasFP16, HasVLX] in { 8222 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8223 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8224 VK2WM>, EVEX_V128; 8225 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8226 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8227 VK4WM>, EVEX_V256; 8228 } 8229 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8230 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8231 VR128X:$src), 0, "att">; 8232 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8233 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8234 VK2WM:$mask, VR128X:$src), 0, "att">; 8235 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8236 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8237 VK2WM:$mask, VR128X:$src), 0, "att">; 8238 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8239 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8240 i64mem:$src), 0, "att">; 8241 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8242 "$dst {${mask}}, ${src}{1to2}}", 8243 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8244 VK2WM:$mask, i64mem:$src), 0, "att">; 8245 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8246 "$dst {${mask}} {z}, ${src}{1to2}}", 8247 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8248 VK2WM:$mask, i64mem:$src), 0, "att">; 8249 8250 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8251 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8252 VR256X:$src), 0, "att">; 8253 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8254 "$dst {${mask}}, $src}", 8255 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8256 VK4WM:$mask, VR256X:$src), 0, "att">; 8257 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8258 "$dst {${mask}} {z}, $src}", 8259 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8260 VK4WM:$mask, VR256X:$src), 0, "att">; 8261 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8262 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8263 i64mem:$src), 0, "att">; 8264 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8265 "$dst {${mask}}, ${src}{1to4}}", 8266 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8267 VK4WM:$mask, i64mem:$src), 0, "att">; 8268 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8269 "$dst {${mask}} {z}, ${src}{1to4}}", 8270 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8271 VK4WM:$mask, i64mem:$src), 0, "att">; 8272 8273 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8274 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8275 VR512:$src), 0, "att">; 8276 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8277 "$dst {${mask}}, $src}", 8278 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8279 VK8WM:$mask, VR512:$src), 0, "att">; 8280 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8281 "$dst {${mask}} {z}, $src}", 8282 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8283 VK8WM:$mask, VR512:$src), 0, "att">; 8284 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8285 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8286 i64mem:$src), 0, "att">; 8287 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8288 "$dst {${mask}}, ${src}{1to8}}", 8289 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8290 VK8WM:$mask, i64mem:$src), 0, "att">; 8291 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8292 "$dst {${mask}} {z}, ${src}{1to8}}", 8293 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8294 VK8WM:$mask, i64mem:$src), 0, "att">; 8295} 8296 8297defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8298 avx512vl_f32_info, SchedWriteCvtPD2PS, 8299 HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>; 8300defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8301 avx512vl_f16_info, SchedWriteCvtPS2PD, 8302 HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>; 8303defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8304 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>; 8305defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8306 T_MAP5PS, EVEX_CD8<16, CD8VQ>; 8307 8308let Predicates = [HasFP16, HasVLX] in { 8309 // Special patterns to allow use of X86vmfpround for masking. Instruction 8310 // patterns have been disabled with null_frag. 8311 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8312 (VCVTPD2PHZ256rr VR256X:$src)>; 8313 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8314 VK4WM:$mask)), 8315 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8316 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8317 VK4WM:$mask), 8318 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8319 8320 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8321 (VCVTPD2PHZ256rm addr:$src)>; 8322 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8323 VK4WM:$mask), 8324 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8325 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8326 VK4WM:$mask), 8327 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8328 8329 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8330 (VCVTPD2PHZ256rmb addr:$src)>; 8331 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8332 (v8f16 VR128X:$src0), VK4WM:$mask), 8333 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8334 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8335 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8336 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8337 8338 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8339 (VCVTPD2PHZ128rr VR128X:$src)>; 8340 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8341 VK2WM:$mask), 8342 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8343 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8344 VK2WM:$mask), 8345 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8346 8347 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8348 (VCVTPD2PHZ128rm addr:$src)>; 8349 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8350 VK2WM:$mask), 8351 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8352 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8353 VK2WM:$mask), 8354 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8355 8356 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8357 (VCVTPD2PHZ128rmb addr:$src)>; 8358 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8359 (v8f16 VR128X:$src0), VK2WM:$mask), 8360 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8361 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8362 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8363 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8364} 8365 8366// Convert Signed/Unsigned Doubleword to Double 8367let Uses = []<Register>, mayRaiseFPException = 0 in 8368multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8369 SDNode MaskOpNode, SDPatternOperator OpNode128, 8370 SDNode MaskOpNode128, 8371 X86SchedWriteWidths sched> { 8372 // No rounding in this op 8373 let Predicates = [HasAVX512] in 8374 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8375 MaskOpNode, sched.ZMM>, EVEX_V512; 8376 8377 let Predicates = [HasVLX] in { 8378 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8379 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8380 "", i64mem, VK2WM, 8381 (v2f64 (OpNode128 (bc_v4i32 8382 (v2i64 8383 (scalar_to_vector (loadi64 addr:$src)))))), 8384 (v2f64 (MaskOpNode128 (bc_v4i32 8385 (v2i64 8386 (scalar_to_vector (loadi64 addr:$src))))))>, 8387 EVEX_V128; 8388 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8389 MaskOpNode, sched.YMM>, EVEX_V256; 8390 } 8391} 8392 8393// Convert Signed/Unsigned Doubleword to Float 8394multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8395 SDNode MaskOpNode, SDNode OpNodeRnd, 8396 X86SchedWriteWidths sched> { 8397 let Predicates = [HasAVX512] in 8398 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8399 MaskOpNode, sched.ZMM>, 8400 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8401 OpNodeRnd, sched.ZMM>, EVEX_V512; 8402 8403 let Predicates = [HasVLX] in { 8404 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8405 MaskOpNode, sched.XMM>, EVEX_V128; 8406 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8407 MaskOpNode, sched.YMM>, EVEX_V256; 8408 } 8409} 8410 8411// Convert Float to Signed/Unsigned Doubleword with truncation 8412multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8413 SDNode MaskOpNode, 8414 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8415 let Predicates = [HasAVX512] in { 8416 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8417 MaskOpNode, sched.ZMM>, 8418 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8419 OpNodeSAE, sched.ZMM>, EVEX_V512; 8420 } 8421 let Predicates = [HasVLX] in { 8422 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8423 MaskOpNode, sched.XMM>, EVEX_V128; 8424 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8425 MaskOpNode, sched.YMM>, EVEX_V256; 8426 } 8427} 8428 8429// Convert Float to Signed/Unsigned Doubleword 8430multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8431 SDNode MaskOpNode, SDNode OpNodeRnd, 8432 X86SchedWriteWidths sched> { 8433 let Predicates = [HasAVX512] in { 8434 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8435 MaskOpNode, sched.ZMM>, 8436 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8437 OpNodeRnd, sched.ZMM>, EVEX_V512; 8438 } 8439 let Predicates = [HasVLX] in { 8440 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8441 MaskOpNode, sched.XMM>, EVEX_V128; 8442 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8443 MaskOpNode, sched.YMM>, EVEX_V256; 8444 } 8445} 8446 8447// Convert Double to Signed/Unsigned Doubleword with truncation 8448multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8449 SDNode MaskOpNode, SDNode OpNodeSAE, 8450 X86SchedWriteWidths sched> { 8451 let Predicates = [HasAVX512] in { 8452 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8453 MaskOpNode, sched.ZMM>, 8454 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8455 OpNodeSAE, sched.ZMM>, EVEX_V512; 8456 } 8457 let Predicates = [HasVLX] in { 8458 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8459 // memory forms of these instructions in Asm Parser. They have the same 8460 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8461 // due to the same reason. 8462 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8463 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8464 VK2WM>, EVEX_V128; 8465 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8466 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8467 } 8468 8469 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8470 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8471 VR128X:$src), 0, "att">; 8472 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8473 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8474 VK2WM:$mask, VR128X:$src), 0, "att">; 8475 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8476 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8477 VK2WM:$mask, VR128X:$src), 0, "att">; 8478 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8479 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8480 f64mem:$src), 0, "att">; 8481 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8482 "$dst {${mask}}, ${src}{1to2}}", 8483 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8484 VK2WM:$mask, f64mem:$src), 0, "att">; 8485 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8486 "$dst {${mask}} {z}, ${src}{1to2}}", 8487 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8488 VK2WM:$mask, f64mem:$src), 0, "att">; 8489 8490 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8491 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8492 VR256X:$src), 0, "att">; 8493 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8494 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8495 VK4WM:$mask, VR256X:$src), 0, "att">; 8496 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8497 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8498 VK4WM:$mask, VR256X:$src), 0, "att">; 8499 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8500 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8501 f64mem:$src), 0, "att">; 8502 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8503 "$dst {${mask}}, ${src}{1to4}}", 8504 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8505 VK4WM:$mask, f64mem:$src), 0, "att">; 8506 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8507 "$dst {${mask}} {z}, ${src}{1to4}}", 8508 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8509 VK4WM:$mask, f64mem:$src), 0, "att">; 8510} 8511 8512// Convert Double to Signed/Unsigned Doubleword 8513multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8514 SDNode MaskOpNode, SDNode OpNodeRnd, 8515 X86SchedWriteWidths sched> { 8516 let Predicates = [HasAVX512] in { 8517 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8518 MaskOpNode, sched.ZMM>, 8519 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8520 OpNodeRnd, sched.ZMM>, EVEX_V512; 8521 } 8522 let Predicates = [HasVLX] in { 8523 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8524 // memory forms of these instructions in Asm Parcer. They have the same 8525 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8526 // due to the same reason. 8527 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8528 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8529 VK2WM>, EVEX_V128; 8530 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8531 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8532 } 8533 8534 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8535 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8536 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8537 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8538 VK2WM:$mask, VR128X:$src), 0, "att">; 8539 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8540 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8541 VK2WM:$mask, VR128X:$src), 0, "att">; 8542 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8543 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8544 f64mem:$src), 0, "att">; 8545 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8546 "$dst {${mask}}, ${src}{1to2}}", 8547 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8548 VK2WM:$mask, f64mem:$src), 0, "att">; 8549 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8550 "$dst {${mask}} {z}, ${src}{1to2}}", 8551 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8552 VK2WM:$mask, f64mem:$src), 0, "att">; 8553 8554 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8555 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8556 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8557 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8558 VK4WM:$mask, VR256X:$src), 0, "att">; 8559 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8560 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8561 VK4WM:$mask, VR256X:$src), 0, "att">; 8562 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8563 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8564 f64mem:$src), 0, "att">; 8565 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8566 "$dst {${mask}}, ${src}{1to4}}", 8567 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8568 VK4WM:$mask, f64mem:$src), 0, "att">; 8569 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8570 "$dst {${mask}} {z}, ${src}{1to4}}", 8571 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8572 VK4WM:$mask, f64mem:$src), 0, "att">; 8573} 8574 8575// Convert Double to Signed/Unsigned Quardword 8576multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8577 SDNode MaskOpNode, SDNode OpNodeRnd, 8578 X86SchedWriteWidths sched> { 8579 let Predicates = [HasDQI] in { 8580 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8581 MaskOpNode, sched.ZMM>, 8582 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8583 OpNodeRnd, sched.ZMM>, EVEX_V512; 8584 } 8585 let Predicates = [HasDQI, HasVLX] in { 8586 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8587 MaskOpNode, sched.XMM>, EVEX_V128; 8588 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8589 MaskOpNode, sched.YMM>, EVEX_V256; 8590 } 8591} 8592 8593// Convert Double to Signed/Unsigned Quardword with truncation 8594multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8595 SDNode MaskOpNode, SDNode OpNodeRnd, 8596 X86SchedWriteWidths sched> { 8597 let Predicates = [HasDQI] in { 8598 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8599 MaskOpNode, sched.ZMM>, 8600 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8601 OpNodeRnd, sched.ZMM>, EVEX_V512; 8602 } 8603 let Predicates = [HasDQI, HasVLX] in { 8604 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8605 MaskOpNode, sched.XMM>, EVEX_V128; 8606 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8607 MaskOpNode, sched.YMM>, EVEX_V256; 8608 } 8609} 8610 8611// Convert Signed/Unsigned Quardword to Double 8612multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8613 SDNode MaskOpNode, SDNode OpNodeRnd, 8614 X86SchedWriteWidths sched> { 8615 let Predicates = [HasDQI] in { 8616 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8617 MaskOpNode, sched.ZMM>, 8618 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8619 OpNodeRnd, sched.ZMM>, EVEX_V512; 8620 } 8621 let Predicates = [HasDQI, HasVLX] in { 8622 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8623 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 8624 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8625 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 8626 } 8627} 8628 8629// Convert Float to Signed/Unsigned Quardword 8630multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8631 SDNode MaskOpNode, SDNode OpNodeRnd, 8632 X86SchedWriteWidths sched> { 8633 let Predicates = [HasDQI] in { 8634 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8635 MaskOpNode, sched.ZMM>, 8636 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8637 OpNodeRnd, sched.ZMM>, EVEX_V512; 8638 } 8639 let Predicates = [HasDQI, HasVLX] in { 8640 // Explicitly specified broadcast string, since we take only 2 elements 8641 // from v4f32x_info source 8642 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8643 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8644 (v2i64 (OpNode (bc_v4f32 8645 (v2f64 8646 (scalar_to_vector (loadf64 addr:$src)))))), 8647 (v2i64 (MaskOpNode (bc_v4f32 8648 (v2f64 8649 (scalar_to_vector (loadf64 addr:$src))))))>, 8650 EVEX_V128; 8651 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8652 MaskOpNode, sched.YMM>, EVEX_V256; 8653 } 8654} 8655 8656// Convert Float to Signed/Unsigned Quardword with truncation 8657multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8658 SDNode MaskOpNode, SDNode OpNodeRnd, 8659 X86SchedWriteWidths sched> { 8660 let Predicates = [HasDQI] in { 8661 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8662 MaskOpNode, sched.ZMM>, 8663 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8664 OpNodeRnd, sched.ZMM>, EVEX_V512; 8665 } 8666 let Predicates = [HasDQI, HasVLX] in { 8667 // Explicitly specified broadcast string, since we take only 2 elements 8668 // from v4f32x_info source 8669 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8670 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8671 (v2i64 (OpNode (bc_v4f32 8672 (v2f64 8673 (scalar_to_vector (loadf64 addr:$src)))))), 8674 (v2i64 (MaskOpNode (bc_v4f32 8675 (v2f64 8676 (scalar_to_vector (loadf64 addr:$src))))))>, 8677 EVEX_V128; 8678 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8679 MaskOpNode, sched.YMM>, EVEX_V256; 8680 } 8681} 8682 8683// Convert Signed/Unsigned Quardword to Float 8684// Also Convert Signed/Unsigned Doubleword to Half 8685multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8686 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8687 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8688 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8689 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8690 let Predicates = [prd] in { 8691 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8692 MaskOpNode, sched.ZMM>, 8693 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8694 OpNodeRnd, sched.ZMM>, EVEX_V512; 8695 } 8696 let Predicates = [prd, HasVLX] in { 8697 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8698 // memory forms of these instructions in Asm Parcer. They have the same 8699 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8700 // due to the same reason. 8701 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8702 null_frag, sched.XMM, _src.info128.BroadcastStr, 8703 "{x}", i128mem, _src.info128.KRCWM>, 8704 EVEX_V128, NotEVEX2VEXConvertible; 8705 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8706 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8707 "{y}">, EVEX_V256, 8708 NotEVEX2VEXConvertible; 8709 8710 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8711 // patterns have been disabled with null_frag. 8712 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8713 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8714 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8715 _src.info128.KRCWM:$mask), 8716 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8717 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8718 _src.info128.KRCWM:$mask), 8719 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8720 8721 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8722 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8723 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8724 _src.info128.KRCWM:$mask), 8725 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8726 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8727 _src.info128.KRCWM:$mask), 8728 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8729 8730 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8731 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8732 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8733 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8734 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8735 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8736 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8737 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8738 } 8739 8740 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8741 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8742 VR128X:$src), 0, "att">; 8743 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8744 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8745 VK2WM:$mask, VR128X:$src), 0, "att">; 8746 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8747 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8748 VK2WM:$mask, VR128X:$src), 0, "att">; 8749 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8750 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8751 i64mem:$src), 0, "att">; 8752 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8753 "$dst {${mask}}, ${src}{1to2}}", 8754 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8755 VK2WM:$mask, i64mem:$src), 0, "att">; 8756 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8757 "$dst {${mask}} {z}, ${src}{1to2}}", 8758 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8759 VK2WM:$mask, i64mem:$src), 0, "att">; 8760 8761 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8762 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8763 VR256X:$src), 0, "att">; 8764 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8765 "$dst {${mask}}, $src}", 8766 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8767 VK4WM:$mask, VR256X:$src), 0, "att">; 8768 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8769 "$dst {${mask}} {z}, $src}", 8770 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8771 VK4WM:$mask, VR256X:$src), 0, "att">; 8772 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8773 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8774 i64mem:$src), 0, "att">; 8775 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8776 "$dst {${mask}}, ${src}{1to4}}", 8777 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8778 VK4WM:$mask, i64mem:$src), 0, "att">; 8779 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8780 "$dst {${mask}} {z}, ${src}{1to4}}", 8781 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8782 VK4WM:$mask, i64mem:$src), 0, "att">; 8783} 8784 8785defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8786 X86any_VSintToFP, X86VSintToFP, 8787 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8788 8789defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8790 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8791 PS, EVEX_CD8<32, CD8VF>; 8792 8793defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8794 X86cvttp2si, X86cvttp2siSAE, 8795 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; 8796 8797defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8798 X86cvttp2si, X86cvttp2siSAE, 8799 SchedWriteCvtPD2DQ>, 8800 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8801 8802defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8803 X86cvttp2ui, X86cvttp2uiSAE, 8804 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; 8805 8806defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8807 X86cvttp2ui, X86cvttp2uiSAE, 8808 SchedWriteCvtPD2DQ>, 8809 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8810 8811defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8812 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8813 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8814 8815defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8816 uint_to_fp, X86VUintToFpRnd, 8817 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; 8818 8819defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8820 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8821 EVEX_CD8<32, CD8VF>; 8822 8823defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8824 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8825 VEX_W, EVEX_CD8<64, CD8VF>; 8826 8827defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8828 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8829 PS, EVEX_CD8<32, CD8VF>; 8830 8831defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8832 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8833 PS, EVEX_CD8<64, CD8VF>; 8834 8835defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8836 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8837 PD, EVEX_CD8<64, CD8VF>; 8838 8839defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8840 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8841 EVEX_CD8<32, CD8VH>; 8842 8843defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8844 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8845 PD, EVEX_CD8<64, CD8VF>; 8846 8847defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8848 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8849 EVEX_CD8<32, CD8VH>; 8850 8851defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8852 X86cvttp2si, X86cvttp2siSAE, 8853 SchedWriteCvtPD2DQ>, VEX_W, 8854 PD, EVEX_CD8<64, CD8VF>; 8855 8856defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8857 X86cvttp2si, X86cvttp2siSAE, 8858 SchedWriteCvtPS2DQ>, PD, 8859 EVEX_CD8<32, CD8VH>; 8860 8861defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8862 X86cvttp2ui, X86cvttp2uiSAE, 8863 SchedWriteCvtPD2DQ>, VEX_W, 8864 PD, EVEX_CD8<64, CD8VF>; 8865 8866defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8867 X86cvttp2ui, X86cvttp2uiSAE, 8868 SchedWriteCvtPS2DQ>, PD, 8869 EVEX_CD8<32, CD8VH>; 8870 8871defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8872 sint_to_fp, X86VSintToFpRnd, 8873 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; 8874 8875defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8876 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8877 VEX_W, XS, EVEX_CD8<64, CD8VF>; 8878 8879defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8880 X86any_VSintToFP, X86VMSintToFP, 8881 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8882 SchedWriteCvtDQ2PS, HasFP16>, 8883 T_MAP5PS, EVEX_CD8<32, CD8VF>; 8884 8885defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8886 X86any_VUintToFP, X86VMUintToFP, 8887 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8888 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD, 8889 EVEX_CD8<32, CD8VF>; 8890 8891defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8892 X86any_VSintToFP, X86VMSintToFP, 8893 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8894 SchedWriteCvtDQ2PS>, VEX_W, PS, 8895 EVEX_CD8<64, CD8VF>; 8896 8897defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8898 X86any_VUintToFP, X86VMUintToFP, 8899 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8900 SchedWriteCvtDQ2PS>, VEX_W, XD, 8901 EVEX_CD8<64, CD8VF>; 8902 8903let Predicates = [HasVLX] in { 8904 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8905 // patterns have been disabled with null_frag. 8906 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8907 (VCVTPD2DQZ128rr VR128X:$src)>; 8908 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8909 VK2WM:$mask), 8910 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8911 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8912 VK2WM:$mask), 8913 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8914 8915 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8916 (VCVTPD2DQZ128rm addr:$src)>; 8917 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8918 VK2WM:$mask), 8919 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8920 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8921 VK2WM:$mask), 8922 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8923 8924 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8925 (VCVTPD2DQZ128rmb addr:$src)>; 8926 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8927 (v4i32 VR128X:$src0), VK2WM:$mask), 8928 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8929 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8930 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8931 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8932 8933 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8934 // patterns have been disabled with null_frag. 8935 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8936 (VCVTTPD2DQZ128rr VR128X:$src)>; 8937 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8938 VK2WM:$mask), 8939 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8940 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8941 VK2WM:$mask), 8942 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8943 8944 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8945 (VCVTTPD2DQZ128rm addr:$src)>; 8946 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8947 VK2WM:$mask), 8948 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8949 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8950 VK2WM:$mask), 8951 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8952 8953 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8954 (VCVTTPD2DQZ128rmb addr:$src)>; 8955 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8956 (v4i32 VR128X:$src0), VK2WM:$mask), 8957 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8958 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8959 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8960 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8961 8962 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8963 // patterns have been disabled with null_frag. 8964 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8965 (VCVTPD2UDQZ128rr VR128X:$src)>; 8966 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8967 VK2WM:$mask), 8968 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8969 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8970 VK2WM:$mask), 8971 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8972 8973 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8974 (VCVTPD2UDQZ128rm addr:$src)>; 8975 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8976 VK2WM:$mask), 8977 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8978 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8979 VK2WM:$mask), 8980 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8981 8982 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8983 (VCVTPD2UDQZ128rmb addr:$src)>; 8984 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8985 (v4i32 VR128X:$src0), VK2WM:$mask), 8986 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8987 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8988 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8989 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8990 8991 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8992 // patterns have been disabled with null_frag. 8993 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8994 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8995 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8996 VK2WM:$mask), 8997 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8998 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8999 VK2WM:$mask), 9000 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9001 9002 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 9003 (VCVTTPD2UDQZ128rm addr:$src)>; 9004 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9005 VK2WM:$mask), 9006 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9007 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9008 VK2WM:$mask), 9009 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 9010 9011 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 9012 (VCVTTPD2UDQZ128rmb addr:$src)>; 9013 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9014 (v4i32 VR128X:$src0), VK2WM:$mask), 9015 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9016 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9017 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9018 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 9019} 9020 9021let Predicates = [HasDQI, HasVLX] in { 9022 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9023 (VCVTPS2QQZ128rm addr:$src)>; 9024 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9025 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9026 VR128X:$src0)), 9027 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9028 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9029 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9030 v2i64x_info.ImmAllZerosV)), 9031 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9032 9033 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9034 (VCVTPS2UQQZ128rm addr:$src)>; 9035 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9036 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9037 VR128X:$src0)), 9038 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9039 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9040 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9041 v2i64x_info.ImmAllZerosV)), 9042 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9043 9044 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9045 (VCVTTPS2QQZ128rm addr:$src)>; 9046 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9047 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9048 VR128X:$src0)), 9049 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9050 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9051 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9052 v2i64x_info.ImmAllZerosV)), 9053 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9054 9055 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9056 (VCVTTPS2UQQZ128rm addr:$src)>; 9057 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9058 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9059 VR128X:$src0)), 9060 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9061 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9062 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9063 v2i64x_info.ImmAllZerosV)), 9064 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9065} 9066 9067let Predicates = [HasVLX] in { 9068 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9069 (VCVTDQ2PDZ128rm addr:$src)>; 9070 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9071 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9072 VR128X:$src0)), 9073 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9074 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9075 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9076 v2f64x_info.ImmAllZerosV)), 9077 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9078 9079 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9080 (VCVTUDQ2PDZ128rm addr:$src)>; 9081 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9082 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9083 VR128X:$src0)), 9084 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9085 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9086 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9087 v2f64x_info.ImmAllZerosV)), 9088 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9089} 9090 9091//===----------------------------------------------------------------------===// 9092// Half precision conversion instructions 9093//===----------------------------------------------------------------------===// 9094 9095let Uses = [MXCSR], mayRaiseFPException = 1 in 9096multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9097 X86MemOperand x86memop, dag ld_dag, 9098 X86FoldableSchedWrite sched> { 9099 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 9100 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 9101 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 9102 (X86cvtph2ps (_src.VT _src.RC:$src))>, 9103 T8PD, Sched<[sched]>; 9104 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 9105 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 9106 (X86any_cvtph2ps (_src.VT ld_dag)), 9107 (X86cvtph2ps (_src.VT ld_dag))>, 9108 T8PD, Sched<[sched.Folded]>; 9109} 9110 9111multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9112 X86FoldableSchedWrite sched> { 9113 let Uses = [MXCSR] in 9114 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 9115 (ins _src.RC:$src), "vcvtph2ps", 9116 "{sae}, $src", "$src, {sae}", 9117 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 9118 T8PD, EVEX_B, Sched<[sched]>; 9119} 9120 9121let Predicates = [HasAVX512] in 9122 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 9123 (load addr:$src), WriteCvtPH2PSZ>, 9124 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 9125 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9126 9127let Predicates = [HasVLX] in { 9128 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 9129 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 9130 EVEX_CD8<32, CD8VH>; 9131 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 9132 (bitconvert (v2i64 (X86vzload64 addr:$src))), 9133 WriteCvtPH2PS>, EVEX, EVEX_V128, 9134 EVEX_CD8<32, CD8VH>; 9135 9136 // Pattern match vcvtph2ps of a scalar i64 load. 9137 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 9138 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 9139 (VCVTPH2PSZ128rm addr:$src)>; 9140} 9141 9142multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9143 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 9144let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9145 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9146 (ins _src.RC:$src1, i32u8imm:$src2), 9147 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 9148 [(set _dest.RC:$dst, 9149 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9150 Sched<[RR]>; 9151 let Constraints = "$src0 = $dst" in 9152 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9153 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9154 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 9155 [(set _dest.RC:$dst, 9156 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9157 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9158 Sched<[RR]>, EVEX_K; 9159 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9160 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9161 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 9162 [(set _dest.RC:$dst, 9163 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9164 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9165 Sched<[RR]>, EVEX_KZ; 9166 let hasSideEffects = 0, mayStore = 1 in { 9167 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 9168 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 9169 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9170 Sched<[MR]>; 9171 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 9172 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9173 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 9174 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 9175 } 9176} 9177} 9178 9179multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9180 SchedWrite Sched> { 9181 let hasSideEffects = 0, Uses = [MXCSR] in 9182 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 9183 (outs _dest.RC:$dst), 9184 (ins _src.RC:$src1, i32u8imm:$src2), 9185 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 9186 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 9187} 9188 9189let Predicates = [HasAVX512] in { 9190 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 9191 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 9192 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 9193 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9194 9195 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 9196 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 9197} 9198 9199let Predicates = [HasVLX] in { 9200 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9201 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9202 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9203 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9204 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9205 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9206 9207 def : Pat<(store (f64 (extractelt 9208 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9209 (iPTR 0))), addr:$dst), 9210 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9211 def : Pat<(store (i64 (extractelt 9212 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9213 (iPTR 0))), addr:$dst), 9214 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9215 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9216 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9217} 9218 9219// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9220multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9221 string OpcodeStr, Domain d, 9222 X86FoldableSchedWrite sched = WriteFComX> { 9223 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9224 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9225 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9226 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9227} 9228 9229let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9230 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9231 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9232 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9233 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 9234 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9235 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9236 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9237 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 9238} 9239 9240let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9241 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9242 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9243 EVEX_CD8<32, CD8VT1>; 9244 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9245 "ucomisd", SSEPackedDouble>, PD, EVEX, 9246 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9247 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9248 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9249 EVEX_CD8<32, CD8VT1>; 9250 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9251 "comisd", SSEPackedDouble>, PD, EVEX, 9252 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9253 let isCodeGenOnly = 1 in { 9254 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9255 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9256 EVEX_CD8<32, CD8VT1>; 9257 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9258 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 9259 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9260 9261 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9262 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9263 EVEX_CD8<32, CD8VT1>; 9264 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9265 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 9266 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9267 } 9268} 9269 9270let Defs = [EFLAGS], Predicates = [HasFP16] in { 9271 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9272 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9273 EVEX_CD8<16, CD8VT1>; 9274 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9275 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9276 EVEX_CD8<16, CD8VT1>; 9277 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9278 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX, 9279 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9280 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9281 "comish", SSEPackedSingle>, T_MAP5PS, EVEX, 9282 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9283 let isCodeGenOnly = 1 in { 9284 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9285 sse_load_f16, "ucomish", SSEPackedSingle>, 9286 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9287 9288 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9289 sse_load_f16, "comish", SSEPackedSingle>, 9290 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9291 } 9292} 9293 9294/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9295multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9296 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9297 Predicate prd = HasAVX512> { 9298 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9299 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9300 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9301 "$src2, $src1", "$src1, $src2", 9302 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9303 EVEX_4V, VEX_LIG, Sched<[sched]>; 9304 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9305 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9306 "$src2, $src1", "$src1, $src2", 9307 (OpNode (_.VT _.RC:$src1), 9308 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, 9309 Sched<[sched.Folded, sched.ReadAfterFold]>; 9310} 9311} 9312 9313defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9314 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9315 T_MAP6PD; 9316defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9317 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9318 EVEX_CD8<16, CD8VT1>, T_MAP6PD; 9319let Uses = [MXCSR] in { 9320defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9321 f32x_info>, EVEX_CD8<32, CD8VT1>, 9322 T8PD; 9323defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9324 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 9325 T8PD; 9326defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9327 SchedWriteFRsqrt.Scl, f32x_info>, 9328 EVEX_CD8<32, CD8VT1>, T8PD; 9329defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9330 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 9331 EVEX_CD8<64, CD8VT1>, T8PD; 9332} 9333 9334/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9335multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9336 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9337 let ExeDomain = _.ExeDomain in { 9338 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9339 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9340 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 9341 Sched<[sched]>; 9342 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9343 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9344 (OpNode (_.VT 9345 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 9346 Sched<[sched.Folded, sched.ReadAfterFold]>; 9347 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9348 (ins _.ScalarMemOp:$src), OpcodeStr, 9349 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9350 (OpNode (_.VT 9351 (_.BroadcastLdFrag addr:$src)))>, 9352 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9353 } 9354} 9355 9356multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9357 X86SchedWriteWidths sched> { 9358 let Uses = [MXCSR] in { 9359 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9360 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9361 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9362 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 9363 } 9364 let Predicates = [HasFP16] in 9365 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9366 v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9367 9368 // Define only if AVX512VL feature is present. 9369 let Predicates = [HasVLX], Uses = [MXCSR] in { 9370 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9371 OpNode, sched.XMM, v4f32x_info>, 9372 EVEX_V128, EVEX_CD8<32, CD8VF>; 9373 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9374 OpNode, sched.YMM, v8f32x_info>, 9375 EVEX_V256, EVEX_CD8<32, CD8VF>; 9376 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9377 OpNode, sched.XMM, v2f64x_info>, 9378 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 9379 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9380 OpNode, sched.YMM, v4f64x_info>, 9381 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 9382 } 9383 let Predicates = [HasFP16, HasVLX] in { 9384 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9385 OpNode, sched.XMM, v8f16x_info>, 9386 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9387 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9388 OpNode, sched.YMM, v16f16x_info>, 9389 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9390 } 9391} 9392 9393defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9394defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9395 9396/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9397multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9398 SDNode OpNode, SDNode OpNodeSAE, 9399 X86FoldableSchedWrite sched> { 9400 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9401 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9402 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9403 "$src2, $src1", "$src1, $src2", 9404 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9405 Sched<[sched]>, SIMD_EXC; 9406 9407 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9408 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9409 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9410 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9411 EVEX_B, Sched<[sched]>; 9412 9413 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9414 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9415 "$src2, $src1", "$src1, $src2", 9416 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9417 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9418 } 9419} 9420 9421multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9422 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9423 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9424 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V; 9425 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9426 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V; 9427} 9428 9429multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9430 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9431 let Predicates = [HasFP16] in 9432 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9433 EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V; 9434} 9435 9436let Predicates = [HasERI] in { 9437 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 9438 SchedWriteFRcp.Scl>; 9439 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 9440 SchedWriteFRsqrt.Scl>; 9441} 9442 9443defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9444 SchedWriteFRnd.Scl>, 9445 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9446 SchedWriteFRnd.Scl>; 9447/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9448 9449multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9450 SDNode OpNode, X86FoldableSchedWrite sched> { 9451 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9452 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9453 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9454 (OpNode (_.VT _.RC:$src))>, 9455 Sched<[sched]>; 9456 9457 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9458 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9459 (OpNode (_.VT 9460 (bitconvert (_.LdFrag addr:$src))))>, 9461 Sched<[sched.Folded, sched.ReadAfterFold]>; 9462 9463 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9464 (ins _.ScalarMemOp:$src), OpcodeStr, 9465 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9466 (OpNode (_.VT 9467 (_.BroadcastLdFrag addr:$src)))>, 9468 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9469 } 9470} 9471multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9472 SDNode OpNode, X86FoldableSchedWrite sched> { 9473 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9474 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9475 (ins _.RC:$src), OpcodeStr, 9476 "{sae}, $src", "$src, {sae}", 9477 (OpNode (_.VT _.RC:$src))>, 9478 EVEX_B, Sched<[sched]>; 9479} 9480 9481multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9482 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9483 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9484 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9485 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9486 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9487 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9488 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 9489} 9490 9491multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9492 SDNode OpNode, X86SchedWriteWidths sched> { 9493 // Define only if AVX512VL feature is present. 9494 let Predicates = [HasVLX] in { 9495 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9496 sched.XMM>, 9497 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 9498 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9499 sched.YMM>, 9500 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 9501 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9502 sched.XMM>, 9503 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 9504 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9505 sched.YMM>, 9506 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 9507 } 9508} 9509 9510multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9511 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9512 let Predicates = [HasFP16] in 9513 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9514 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9515 T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9516 let Predicates = [HasFP16, HasVLX] in { 9517 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9518 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9519 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9520 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9521 } 9522} 9523let Predicates = [HasERI] in { 9524 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 9525 SchedWriteFRsqrt>, EVEX; 9526 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 9527 SchedWriteFRcp>, EVEX; 9528 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 9529 SchedWriteFAdd>, EVEX; 9530} 9531defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9532 SchedWriteFRnd>, 9533 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9534 SchedWriteFRnd>, 9535 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9536 SchedWriteFRnd>, EVEX; 9537 9538multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9539 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9540 let ExeDomain = _.ExeDomain in 9541 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9542 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9543 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9544 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9545} 9546 9547multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9548 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9549 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9550 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9551 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9552 (_.VT (any_fsqrt _.RC:$src)), 9553 (_.VT (fsqrt _.RC:$src))>, EVEX, 9554 Sched<[sched]>; 9555 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9556 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9557 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9558 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9559 Sched<[sched.Folded, sched.ReadAfterFold]>; 9560 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9561 (ins _.ScalarMemOp:$src), OpcodeStr, 9562 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9563 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9564 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9565 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9566 } 9567} 9568 9569let Uses = [MXCSR], mayRaiseFPException = 1 in 9570multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9571 X86SchedWriteSizes sched> { 9572 let Predicates = [HasFP16] in 9573 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9574 sched.PH.ZMM, v32f16_info>, 9575 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9576 let Predicates = [HasFP16, HasVLX] in { 9577 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9578 sched.PH.XMM, v8f16x_info>, 9579 EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9580 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9581 sched.PH.YMM, v16f16x_info>, 9582 EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9583 } 9584 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9585 sched.PS.ZMM, v16f32_info>, 9586 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9587 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9588 sched.PD.ZMM, v8f64_info>, 9589 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9590 // Define only if AVX512VL feature is present. 9591 let Predicates = [HasVLX] in { 9592 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9593 sched.PS.XMM, v4f32x_info>, 9594 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 9595 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9596 sched.PS.YMM, v8f32x_info>, 9597 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 9598 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9599 sched.PD.XMM, v2f64x_info>, 9600 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9601 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9602 sched.PD.YMM, v4f64x_info>, 9603 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9604 } 9605} 9606 9607let Uses = [MXCSR] in 9608multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9609 X86SchedWriteSizes sched> { 9610 let Predicates = [HasFP16] in 9611 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9612 sched.PH.ZMM, v32f16_info>, 9613 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9614 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9615 sched.PS.ZMM, v16f32_info>, 9616 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9617 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9618 sched.PD.ZMM, v8f64_info>, 9619 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9620} 9621 9622multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9623 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9624 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9625 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9626 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9627 "$src2, $src1", "$src1, $src2", 9628 (X86fsqrts (_.VT _.RC:$src1), 9629 (_.VT _.RC:$src2))>, 9630 Sched<[sched]>, SIMD_EXC; 9631 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9632 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9633 "$src2, $src1", "$src1, $src2", 9634 (X86fsqrts (_.VT _.RC:$src1), 9635 (_.ScalarIntMemFrags addr:$src2))>, 9636 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9637 let Uses = [MXCSR] in 9638 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9639 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9640 "$rc, $src2, $src1", "$src1, $src2, $rc", 9641 (X86fsqrtRnds (_.VT _.RC:$src1), 9642 (_.VT _.RC:$src2), 9643 (i32 timm:$rc))>, 9644 EVEX_B, EVEX_RC, Sched<[sched]>; 9645 9646 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9647 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9648 (ins _.FRC:$src1, _.FRC:$src2), 9649 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9650 Sched<[sched]>, SIMD_EXC; 9651 let mayLoad = 1 in 9652 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9653 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9654 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9655 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9656 } 9657 } 9658 9659 let Predicates = [prd] in { 9660 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9661 (!cast<Instruction>(Name#Zr) 9662 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9663 } 9664 9665 let Predicates = [prd, OptForSize] in { 9666 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9667 (!cast<Instruction>(Name#Zm) 9668 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9669 } 9670} 9671 9672multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9673 X86SchedWriteSizes sched> { 9674 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9675 EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS; 9676 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9677 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9678 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9679 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9680} 9681 9682defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9683 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9684 9685defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9686 9687multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9688 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9689 let ExeDomain = _.ExeDomain in { 9690 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9691 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9692 "$src3, $src2, $src1", "$src1, $src2, $src3", 9693 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9694 (i32 timm:$src3)))>, 9695 Sched<[sched]>, SIMD_EXC; 9696 9697 let Uses = [MXCSR] in 9698 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9699 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9700 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9701 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9702 (i32 timm:$src3)))>, EVEX_B, 9703 Sched<[sched]>; 9704 9705 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9706 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9707 OpcodeStr, 9708 "$src3, $src2, $src1", "$src1, $src2, $src3", 9709 (_.VT (X86RndScales _.RC:$src1, 9710 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9711 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9712 9713 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9714 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9715 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9716 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9717 []>, Sched<[sched]>, SIMD_EXC; 9718 9719 let mayLoad = 1 in 9720 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9721 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9722 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9723 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9724 } 9725 } 9726 9727 let Predicates = [HasAVX512] in { 9728 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9729 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9730 _.FRC:$src1, timm:$src2))>; 9731 } 9732 9733 let Predicates = [HasAVX512, OptForSize] in { 9734 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9735 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9736 addr:$src1, timm:$src2))>; 9737 } 9738} 9739 9740let Predicates = [HasFP16] in 9741defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9742 SchedWriteFRnd.Scl, f16x_info>, 9743 AVX512PSIi8Base, TA, EVEX_4V, 9744 EVEX_CD8<16, CD8VT1>; 9745 9746defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9747 SchedWriteFRnd.Scl, f32x_info>, 9748 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9749 EVEX_CD8<32, CD8VT1>; 9750 9751defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9752 SchedWriteFRnd.Scl, f64x_info>, 9753 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9754 EVEX_CD8<64, CD8VT1>; 9755 9756multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9757 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9758 dag OutMask, Predicate BasePredicate> { 9759 let Predicates = [BasePredicate] in { 9760 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9761 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9762 (extractelt _.VT:$dst, (iPTR 0))))), 9763 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9764 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9765 9766 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9767 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9768 ZeroFP))), 9769 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9770 OutMask, _.VT:$src2, _.VT:$src1)>; 9771 } 9772} 9773 9774defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9775 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9776 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9777defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9778 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9779 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9780defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9781 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9782 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9783 9784 9785//------------------------------------------------- 9786// Integer truncate and extend operations 9787//------------------------------------------------- 9788 9789// PatFrags that contain a select and a truncate op. The take operands in the 9790// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9791// either to the multiclasses. 9792def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9793 (vselect_mask node:$mask, 9794 (trunc node:$src), node:$src0)>; 9795def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9796 (vselect_mask node:$mask, 9797 (X86vtruncs node:$src), node:$src0)>; 9798def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9799 (vselect_mask node:$mask, 9800 (X86vtruncus node:$src), node:$src0)>; 9801 9802multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9803 SDPatternOperator MaskNode, 9804 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9805 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9806 let ExeDomain = DestInfo.ExeDomain in { 9807 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9808 (ins SrcInfo.RC:$src), 9809 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9810 [(set DestInfo.RC:$dst, 9811 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9812 EVEX, Sched<[sched]>; 9813 let Constraints = "$src0 = $dst" in 9814 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9815 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9816 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9817 [(set DestInfo.RC:$dst, 9818 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9819 (DestInfo.VT DestInfo.RC:$src0), 9820 SrcInfo.KRCWM:$mask))]>, 9821 EVEX, EVEX_K, Sched<[sched]>; 9822 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9823 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9824 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9825 [(set DestInfo.RC:$dst, 9826 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9827 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9828 EVEX, EVEX_KZ, Sched<[sched]>; 9829 } 9830 9831 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9832 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9833 (ins x86memop:$dst, SrcInfo.RC:$src), 9834 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9835 EVEX, Sched<[sched.Folded]>; 9836 9837 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9838 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9839 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9840 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9841 }//mayStore = 1, hasSideEffects = 0 9842} 9843 9844multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9845 PatFrag truncFrag, PatFrag mtruncFrag, 9846 string Name> { 9847 9848 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9849 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9850 addr:$dst, SrcInfo.RC:$src)>; 9851 9852 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9853 SrcInfo.KRCWM:$mask), 9854 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9855 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9856} 9857 9858multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9859 SDNode OpNode256, SDNode OpNode512, 9860 SDPatternOperator MaskNode128, 9861 SDPatternOperator MaskNode256, 9862 SDPatternOperator MaskNode512, 9863 X86FoldableSchedWrite sched, 9864 AVX512VLVectorVTInfo VTSrcInfo, 9865 X86VectorVTInfo DestInfoZ128, 9866 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9867 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9868 X86MemOperand x86memopZ, PatFrag truncFrag, 9869 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9870 9871 let Predicates = [HasVLX, prd] in { 9872 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9873 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9874 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9875 mtruncFrag, NAME>, EVEX_V128; 9876 9877 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9878 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9879 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9880 mtruncFrag, NAME>, EVEX_V256; 9881 } 9882 let Predicates = [prd] in 9883 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9884 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9885 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9886 mtruncFrag, NAME>, EVEX_V512; 9887} 9888 9889multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9890 X86FoldableSchedWrite sched, PatFrag StoreNode, 9891 PatFrag MaskedStoreNode, SDNode InVecNode, 9892 SDPatternOperator InVecMaskNode> { 9893 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9894 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9895 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9896 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9897 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9898} 9899 9900multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9901 SDPatternOperator MaskNode, 9902 X86FoldableSchedWrite sched, PatFrag StoreNode, 9903 PatFrag MaskedStoreNode, SDNode InVecNode, 9904 SDPatternOperator InVecMaskNode> { 9905 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9906 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9907 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9908 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9909 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9910} 9911 9912multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9913 SDPatternOperator MaskNode, 9914 X86FoldableSchedWrite sched, PatFrag StoreNode, 9915 PatFrag MaskedStoreNode, SDNode InVecNode, 9916 SDPatternOperator InVecMaskNode> { 9917 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9918 InVecMaskNode, MaskNode, MaskNode, sched, 9919 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9920 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9921 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9922} 9923 9924multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9925 SDPatternOperator MaskNode, 9926 X86FoldableSchedWrite sched, PatFrag StoreNode, 9927 PatFrag MaskedStoreNode, SDNode InVecNode, 9928 SDPatternOperator InVecMaskNode> { 9929 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9930 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9931 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9932 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9933 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9934} 9935 9936multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9937 SDPatternOperator MaskNode, 9938 X86FoldableSchedWrite sched, PatFrag StoreNode, 9939 PatFrag MaskedStoreNode, SDNode InVecNode, 9940 SDPatternOperator InVecMaskNode> { 9941 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9942 InVecMaskNode, MaskNode, MaskNode, sched, 9943 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9944 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9945 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9946} 9947 9948multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9949 SDPatternOperator MaskNode, 9950 X86FoldableSchedWrite sched, PatFrag StoreNode, 9951 PatFrag MaskedStoreNode, SDNode InVecNode, 9952 SDPatternOperator InVecMaskNode> { 9953 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9954 InVecMaskNode, MaskNode, MaskNode, sched, 9955 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9956 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9957 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9958} 9959 9960defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 9961 WriteShuffle256, truncstorevi8, 9962 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9963defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 9964 WriteShuffle256, truncstore_s_vi8, 9965 masked_truncstore_s_vi8, X86vtruncs, 9966 X86vmtruncs>; 9967defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 9968 WriteShuffle256, truncstore_us_vi8, 9969 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 9970 9971defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9972 WriteShuffle256, truncstorevi16, 9973 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9974defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9975 WriteShuffle256, truncstore_s_vi16, 9976 masked_truncstore_s_vi16, X86vtruncs, 9977 X86vmtruncs>; 9978defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9979 select_truncus, WriteShuffle256, 9980 truncstore_us_vi16, masked_truncstore_us_vi16, 9981 X86vtruncus, X86vmtruncus>; 9982 9983defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9984 WriteShuffle256, truncstorevi32, 9985 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9986defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9987 WriteShuffle256, truncstore_s_vi32, 9988 masked_truncstore_s_vi32, X86vtruncs, 9989 X86vmtruncs>; 9990defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9991 select_truncus, WriteShuffle256, 9992 truncstore_us_vi32, masked_truncstore_us_vi32, 9993 X86vtruncus, X86vmtruncus>; 9994 9995defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9996 WriteShuffle256, truncstorevi8, 9997 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9998defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9999 WriteShuffle256, truncstore_s_vi8, 10000 masked_truncstore_s_vi8, X86vtruncs, 10001 X86vmtruncs>; 10002defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 10003 select_truncus, WriteShuffle256, 10004 truncstore_us_vi8, masked_truncstore_us_vi8, 10005 X86vtruncus, X86vmtruncus>; 10006 10007defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 10008 WriteShuffle256, truncstorevi16, 10009 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 10010defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 10011 WriteShuffle256, truncstore_s_vi16, 10012 masked_truncstore_s_vi16, X86vtruncs, 10013 X86vmtruncs>; 10014defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 10015 select_truncus, WriteShuffle256, 10016 truncstore_us_vi16, masked_truncstore_us_vi16, 10017 X86vtruncus, X86vmtruncus>; 10018 10019defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 10020 WriteShuffle256, truncstorevi8, 10021 masked_truncstorevi8, X86vtrunc, 10022 X86vmtrunc>; 10023defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 10024 WriteShuffle256, truncstore_s_vi8, 10025 masked_truncstore_s_vi8, X86vtruncs, 10026 X86vmtruncs>; 10027defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 10028 select_truncus, WriteShuffle256, 10029 truncstore_us_vi8, masked_truncstore_us_vi8, 10030 X86vtruncus, X86vmtruncus>; 10031 10032let Predicates = [HasAVX512, NoVLX] in { 10033def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 10034 (v8i16 (EXTRACT_SUBREG 10035 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 10036 VR256X:$src, sub_ymm)))), sub_xmm))>; 10037def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 10038 (v4i32 (EXTRACT_SUBREG 10039 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 10040 VR256X:$src, sub_ymm)))), sub_xmm))>; 10041} 10042 10043let Predicates = [HasBWI, NoVLX] in { 10044def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10045 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 10046 VR256X:$src, sub_ymm))), sub_xmm))>; 10047} 10048 10049// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 10050multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 10051 X86VectorVTInfo DestInfo, 10052 X86VectorVTInfo SrcInfo> { 10053 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10054 DestInfo.RC:$src0, 10055 SrcInfo.KRCWM:$mask)), 10056 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 10057 SrcInfo.KRCWM:$mask, 10058 SrcInfo.RC:$src)>; 10059 10060 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10061 DestInfo.ImmAllZerosV, 10062 SrcInfo.KRCWM:$mask)), 10063 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 10064 SrcInfo.RC:$src)>; 10065} 10066 10067let Predicates = [HasVLX] in { 10068defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 10069defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 10070defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 10071} 10072 10073let Predicates = [HasAVX512] in { 10074defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 10075defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 10076defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 10077 10078defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 10079defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 10080defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 10081 10082defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 10083defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 10084defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 10085} 10086 10087multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 10088 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 10089 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 10090 let ExeDomain = DestInfo.ExeDomain in { 10091 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10092 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 10093 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 10094 EVEX, Sched<[sched]>; 10095 10096 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10097 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 10098 (DestInfo.VT (LdFrag addr:$src))>, 10099 EVEX, Sched<[sched.Folded]>; 10100 } 10101} 10102 10103multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 10104 SDNode OpNode, SDNode InVecNode, string ExtTy, 10105 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10106 let Predicates = [HasVLX, HasBWI] in { 10107 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 10108 v16i8x_info, i64mem, LdFrag, InVecNode>, 10109 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 10110 10111 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 10112 v16i8x_info, i128mem, LdFrag, OpNode>, 10113 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 10114 } 10115 let Predicates = [HasBWI] in { 10116 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 10117 v32i8x_info, i256mem, LdFrag, OpNode>, 10118 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 10119 } 10120} 10121 10122multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 10123 SDNode OpNode, SDNode InVecNode, string ExtTy, 10124 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10125 let Predicates = [HasVLX, HasAVX512] in { 10126 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 10127 v16i8x_info, i32mem, LdFrag, InVecNode>, 10128 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 10129 10130 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 10131 v16i8x_info, i64mem, LdFrag, InVecNode>, 10132 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 10133 } 10134 let Predicates = [HasAVX512] in { 10135 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 10136 v16i8x_info, i128mem, LdFrag, OpNode>, 10137 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 10138 } 10139} 10140 10141multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 10142 SDNode InVecNode, string ExtTy, 10143 X86FoldableSchedWrite sched, 10144 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10145 let Predicates = [HasVLX, HasAVX512] in { 10146 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 10147 v16i8x_info, i16mem, LdFrag, InVecNode>, 10148 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 10149 10150 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 10151 v16i8x_info, i32mem, LdFrag, InVecNode>, 10152 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 10153 } 10154 let Predicates = [HasAVX512] in { 10155 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 10156 v16i8x_info, i64mem, LdFrag, InVecNode>, 10157 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 10158 } 10159} 10160 10161multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 10162 SDNode OpNode, SDNode InVecNode, string ExtTy, 10163 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10164 let Predicates = [HasVLX, HasAVX512] in { 10165 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 10166 v8i16x_info, i64mem, LdFrag, InVecNode>, 10167 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 10168 10169 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 10170 v8i16x_info, i128mem, LdFrag, OpNode>, 10171 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 10172 } 10173 let Predicates = [HasAVX512] in { 10174 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 10175 v16i16x_info, i256mem, LdFrag, OpNode>, 10176 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 10177 } 10178} 10179 10180multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 10181 SDNode OpNode, SDNode InVecNode, string ExtTy, 10182 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10183 let Predicates = [HasVLX, HasAVX512] in { 10184 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 10185 v8i16x_info, i32mem, LdFrag, InVecNode>, 10186 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 10187 10188 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 10189 v8i16x_info, i64mem, LdFrag, InVecNode>, 10190 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 10191 } 10192 let Predicates = [HasAVX512] in { 10193 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 10194 v8i16x_info, i128mem, LdFrag, OpNode>, 10195 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 10196 } 10197} 10198 10199multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 10200 SDNode OpNode, SDNode InVecNode, string ExtTy, 10201 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 10202 10203 let Predicates = [HasVLX, HasAVX512] in { 10204 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 10205 v4i32x_info, i64mem, LdFrag, InVecNode>, 10206 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 10207 10208 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 10209 v4i32x_info, i128mem, LdFrag, OpNode>, 10210 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 10211 } 10212 let Predicates = [HasAVX512] in { 10213 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 10214 v8i32x_info, i256mem, LdFrag, OpNode>, 10215 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 10216 } 10217} 10218 10219defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>; 10220defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>; 10221defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext_invec, "z", WriteShuffle256>; 10222defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>; 10223defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>; 10224defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>; 10225 10226defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>; 10227defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>; 10228defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext_invec, "s", WriteShuffle256>; 10229defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>; 10230defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>; 10231defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>; 10232 10233 10234// Patterns that we also need any extend versions of. aext_vector_inreg 10235// is currently legalized to zext_vector_inreg. 10236multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10237 // 256-bit patterns 10238 let Predicates = [HasVLX, HasBWI] in { 10239 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10240 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10241 } 10242 10243 let Predicates = [HasVLX] in { 10244 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10245 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10246 10247 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10248 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10249 } 10250 10251 // 512-bit patterns 10252 let Predicates = [HasBWI] in { 10253 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10254 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10255 } 10256 let Predicates = [HasAVX512] in { 10257 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10258 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10259 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10260 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10261 10262 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10263 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10264 10265 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10266 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10267 } 10268} 10269 10270multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10271 SDNode InVecOp> : 10272 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10273 // 128-bit patterns 10274 let Predicates = [HasVLX, HasBWI] in { 10275 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10276 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10277 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10278 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10279 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10280 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10281 } 10282 let Predicates = [HasVLX] in { 10283 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10284 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10285 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10286 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10287 10288 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10289 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10290 10291 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10292 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10293 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10294 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10295 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10296 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10297 10298 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10299 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10300 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10301 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10302 10303 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10304 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10305 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10306 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10307 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10308 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10309 } 10310 let Predicates = [HasVLX] in { 10311 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10312 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10313 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10314 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10315 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10316 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10317 10318 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10319 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10320 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10321 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10322 10323 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10324 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10325 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10326 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10327 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10328 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10329 } 10330 // 512-bit patterns 10331 let Predicates = [HasAVX512] in { 10332 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10333 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10334 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10335 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10336 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10337 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10338 } 10339} 10340 10341defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10342defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10343 10344// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10345// ext+trunc aggressively making it impossible to legalize the DAG to this 10346// pattern directly. 10347let Predicates = [HasAVX512, NoBWI] in { 10348def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10349 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10350def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10351 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10352} 10353 10354//===----------------------------------------------------------------------===// 10355// GATHER - SCATTER Operations 10356 10357// FIXME: Improve scheduling of gather/scatter instructions. 10358multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10359 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10360 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10361 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10362 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10363 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10364 !strconcat(OpcodeStr#_.Suffix, 10365 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10366 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10367 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10368} 10369 10370multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10371 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10372 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10373 vy512xmem>, EVEX_V512, VEX_W; 10374 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10375 vz512mem>, EVEX_V512, VEX_W; 10376let Predicates = [HasVLX] in { 10377 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10378 vx256xmem>, EVEX_V256, VEX_W; 10379 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10380 vy256xmem>, EVEX_V256, VEX_W; 10381 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10382 vx128xmem>, EVEX_V128, VEX_W; 10383 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10384 vx128xmem>, EVEX_V128, VEX_W; 10385} 10386} 10387 10388multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10389 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10390 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10391 EVEX_V512; 10392 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10393 EVEX_V512; 10394let Predicates = [HasVLX] in { 10395 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10396 vy256xmem>, EVEX_V256; 10397 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10398 vy128xmem>, EVEX_V256; 10399 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10400 vx128xmem>, EVEX_V128; 10401 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10402 vx64xmem, VK2WM>, EVEX_V128; 10403} 10404} 10405 10406 10407defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10408 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10409 10410defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10411 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10412 10413multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10414 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10415 10416let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10417 hasSideEffects = 0 in 10418 10419 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10420 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10421 !strconcat(OpcodeStr#_.Suffix, 10422 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10423 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10424 Sched<[WriteStore]>; 10425} 10426 10427multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10428 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10429 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10430 vy512xmem>, EVEX_V512, VEX_W; 10431 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10432 vz512mem>, EVEX_V512, VEX_W; 10433let Predicates = [HasVLX] in { 10434 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10435 vx256xmem>, EVEX_V256, VEX_W; 10436 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10437 vy256xmem>, EVEX_V256, VEX_W; 10438 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10439 vx128xmem>, EVEX_V128, VEX_W; 10440 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10441 vx128xmem>, EVEX_V128, VEX_W; 10442} 10443} 10444 10445multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10446 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10447 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10448 EVEX_V512; 10449 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10450 EVEX_V512; 10451let Predicates = [HasVLX] in { 10452 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10453 vy256xmem>, EVEX_V256; 10454 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10455 vy128xmem>, EVEX_V256; 10456 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10457 vx128xmem>, EVEX_V128; 10458 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10459 vx64xmem, VK2WM>, EVEX_V128; 10460} 10461} 10462 10463defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10464 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10465 10466defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10467 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10468 10469// prefetch 10470multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10471 RegisterClass KRC, X86MemOperand memop> { 10472 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 10473 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10474 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10475 EVEX, EVEX_K, Sched<[WriteLoad]>; 10476} 10477 10478defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10479 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10480 10481defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10482 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10483 10484defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10485 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10486 10487defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10488 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10489 10490defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10491 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10492 10493defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10494 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10495 10496defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10497 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10498 10499defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10500 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10501 10502defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10503 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10504 10505defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10506 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10507 10508defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10509 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10510 10511defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10512 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10513 10514defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10515 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10516 10517defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10518 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10519 10520defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10521 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10522 10523defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10524 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10525 10526multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 10527def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10528 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10529 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10530 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 10531} 10532 10533multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10534 string OpcodeStr, Predicate prd> { 10535let Predicates = [prd] in 10536 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 10537 10538 let Predicates = [prd, HasVLX] in { 10539 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 10540 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 10541 } 10542} 10543 10544defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10545defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 10546defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10547defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 10548 10549multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10550 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10551 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10552 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10553 EVEX, Sched<[WriteMove]>; 10554} 10555 10556// Use 512bit version to implement 128/256 bit in case NoVLX. 10557multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10558 X86VectorVTInfo _, 10559 string Name> { 10560 10561 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10562 (_.KVT (COPY_TO_REGCLASS 10563 (!cast<Instruction>(Name#"Zrr") 10564 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10565 _.RC:$src, _.SubRegIdx)), 10566 _.KRC))>; 10567} 10568 10569multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10570 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10571 let Predicates = [prd] in 10572 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10573 EVEX_V512; 10574 10575 let Predicates = [prd, HasVLX] in { 10576 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10577 EVEX_V256; 10578 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10579 EVEX_V128; 10580 } 10581 let Predicates = [prd, NoVLX] in { 10582 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10583 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10584 } 10585} 10586 10587defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10588 avx512vl_i8_info, HasBWI>; 10589defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10590 avx512vl_i16_info, HasBWI>, VEX_W; 10591defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10592 avx512vl_i32_info, HasDQI>; 10593defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10594 avx512vl_i64_info, HasDQI>, VEX_W; 10595 10596// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10597// is available, but BWI is not. We can't handle this in lowering because 10598// a target independent DAG combine likes to combine sext and trunc. 10599let Predicates = [HasDQI, NoBWI] in { 10600 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10601 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10602 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10603 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10604} 10605 10606let Predicates = [HasDQI, NoBWI, HasVLX] in { 10607 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10608 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 10609} 10610 10611//===----------------------------------------------------------------------===// 10612// AVX-512 - COMPRESS and EXPAND 10613// 10614 10615multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10616 string OpcodeStr, X86FoldableSchedWrite sched> { 10617 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10618 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10619 (null_frag)>, AVX5128IBase, 10620 Sched<[sched]>; 10621 10622 let mayStore = 1, hasSideEffects = 0 in 10623 def mr : AVX5128I<opc, MRMDestMem, (outs), 10624 (ins _.MemOp:$dst, _.RC:$src), 10625 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10626 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10627 Sched<[sched.Folded]>; 10628 10629 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10630 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10631 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10632 []>, 10633 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10634 Sched<[sched.Folded]>; 10635} 10636 10637multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10638 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10639 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10640 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10641 10642 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10643 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10644 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10645 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10646 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10647 _.KRCWM:$mask, _.RC:$src)>; 10648} 10649 10650multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10651 X86FoldableSchedWrite sched, 10652 AVX512VLVectorVTInfo VTInfo, 10653 Predicate Pred = HasAVX512> { 10654 let Predicates = [Pred] in 10655 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10656 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10657 10658 let Predicates = [Pred, HasVLX] in { 10659 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10660 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10661 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10662 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10663 } 10664} 10665 10666// FIXME: Is there a better scheduler class for VPCOMPRESS? 10667defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10668 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 10669defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10670 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 10671defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10672 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 10673defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10674 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 10675 10676// expand 10677multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10678 string OpcodeStr, X86FoldableSchedWrite sched> { 10679 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10680 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10681 (null_frag)>, AVX5128IBase, 10682 Sched<[sched]>; 10683 10684 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10685 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10686 (null_frag)>, 10687 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10688 Sched<[sched.Folded, sched.ReadAfterFold]>; 10689} 10690 10691multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10692 10693 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10694 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10695 _.KRCWM:$mask, addr:$src)>; 10696 10697 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10698 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10699 _.KRCWM:$mask, addr:$src)>; 10700 10701 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10702 (_.VT _.RC:$src0))), 10703 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10704 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10705 10706 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10707 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10708 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10709 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10710 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10711 _.KRCWM:$mask, _.RC:$src)>; 10712} 10713 10714multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10715 X86FoldableSchedWrite sched, 10716 AVX512VLVectorVTInfo VTInfo, 10717 Predicate Pred = HasAVX512> { 10718 let Predicates = [Pred] in 10719 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10720 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10721 10722 let Predicates = [Pred, HasVLX] in { 10723 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10724 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10725 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10726 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10727 } 10728} 10729 10730// FIXME: Is there a better scheduler class for VPEXPAND? 10731defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10732 avx512vl_i32_info>, EVEX; 10733defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10734 avx512vl_i64_info>, EVEX, VEX_W; 10735defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10736 avx512vl_f32_info>, EVEX; 10737defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10738 avx512vl_f64_info>, EVEX, VEX_W; 10739 10740//handle instruction reg_vec1 = op(reg_vec,imm) 10741// op(mem_vec,imm) 10742// op(broadcast(eltVt),imm) 10743//all instruction created with FROUND_CURRENT 10744multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10745 SDPatternOperator OpNode, 10746 SDPatternOperator MaskOpNode, 10747 X86FoldableSchedWrite sched, 10748 X86VectorVTInfo _> { 10749 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10750 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10751 (ins _.RC:$src1, i32u8imm:$src2), 10752 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10753 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10754 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10755 Sched<[sched]>; 10756 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10757 (ins _.MemOp:$src1, i32u8imm:$src2), 10758 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10759 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10760 (i32 timm:$src2)), 10761 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10762 (i32 timm:$src2))>, 10763 Sched<[sched.Folded, sched.ReadAfterFold]>; 10764 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10765 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10766 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10767 "${src1}"#_.BroadcastStr#", $src2", 10768 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10769 (i32 timm:$src2)), 10770 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10771 (i32 timm:$src2))>, EVEX_B, 10772 Sched<[sched.Folded, sched.ReadAfterFold]>; 10773 } 10774} 10775 10776//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10777multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10778 SDNode OpNode, X86FoldableSchedWrite sched, 10779 X86VectorVTInfo _> { 10780 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10781 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10782 (ins _.RC:$src1, i32u8imm:$src2), 10783 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10784 "$src1, {sae}, $src2", 10785 (OpNode (_.VT _.RC:$src1), 10786 (i32 timm:$src2))>, 10787 EVEX_B, Sched<[sched]>; 10788} 10789 10790multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10791 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10792 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10793 Predicate prd>{ 10794 let Predicates = [prd] in { 10795 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10796 sched.ZMM, _.info512>, 10797 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10798 sched.ZMM, _.info512>, EVEX_V512; 10799 } 10800 let Predicates = [prd, HasVLX] in { 10801 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10802 sched.XMM, _.info128>, EVEX_V128; 10803 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10804 sched.YMM, _.info256>, EVEX_V256; 10805 } 10806} 10807 10808//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10809// op(reg_vec2,mem_vec,imm) 10810// op(reg_vec2,broadcast(eltVt),imm) 10811//all instruction created with FROUND_CURRENT 10812multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10813 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10814 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10815 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10816 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10817 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10818 (OpNode (_.VT _.RC:$src1), 10819 (_.VT _.RC:$src2), 10820 (i32 timm:$src3))>, 10821 Sched<[sched]>; 10822 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10823 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10824 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10825 (OpNode (_.VT _.RC:$src1), 10826 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10827 (i32 timm:$src3))>, 10828 Sched<[sched.Folded, sched.ReadAfterFold]>; 10829 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10830 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10831 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10832 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10833 (OpNode (_.VT _.RC:$src1), 10834 (_.VT (_.BroadcastLdFrag addr:$src2)), 10835 (i32 timm:$src3))>, EVEX_B, 10836 Sched<[sched.Folded, sched.ReadAfterFold]>; 10837 } 10838} 10839 10840//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10841// op(reg_vec2,mem_vec,imm) 10842multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10843 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10844 X86VectorVTInfo SrcInfo>{ 10845 let ExeDomain = DestInfo.ExeDomain in { 10846 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10847 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10848 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10849 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10850 (SrcInfo.VT SrcInfo.RC:$src2), 10851 (i8 timm:$src3)))>, 10852 Sched<[sched]>; 10853 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10854 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10855 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10856 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10857 (SrcInfo.VT (bitconvert 10858 (SrcInfo.LdFrag addr:$src2))), 10859 (i8 timm:$src3)))>, 10860 Sched<[sched.Folded, sched.ReadAfterFold]>; 10861 } 10862} 10863 10864//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10865// op(reg_vec2,mem_vec,imm) 10866// op(reg_vec2,broadcast(eltVt),imm) 10867multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10868 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10869 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10870 10871 let ExeDomain = _.ExeDomain in 10872 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10873 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10874 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10875 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10876 (OpNode (_.VT _.RC:$src1), 10877 (_.VT (_.BroadcastLdFrag addr:$src2)), 10878 (i8 timm:$src3))>, EVEX_B, 10879 Sched<[sched.Folded, sched.ReadAfterFold]>; 10880} 10881 10882//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10883// op(reg_vec2,mem_scalar,imm) 10884multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10885 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10886 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10887 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10888 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10889 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10890 (OpNode (_.VT _.RC:$src1), 10891 (_.VT _.RC:$src2), 10892 (i32 timm:$src3))>, 10893 Sched<[sched]>; 10894 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10895 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10896 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10897 (OpNode (_.VT _.RC:$src1), 10898 (_.ScalarIntMemFrags addr:$src2), 10899 (i32 timm:$src3))>, 10900 Sched<[sched.Folded, sched.ReadAfterFold]>; 10901 } 10902} 10903 10904//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10905multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10906 SDNode OpNode, X86FoldableSchedWrite sched, 10907 X86VectorVTInfo _> { 10908 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10909 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10910 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10911 OpcodeStr, "$src3, {sae}, $src2, $src1", 10912 "$src1, $src2, {sae}, $src3", 10913 (OpNode (_.VT _.RC:$src1), 10914 (_.VT _.RC:$src2), 10915 (i32 timm:$src3))>, 10916 EVEX_B, Sched<[sched]>; 10917} 10918 10919//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10920multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10921 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10922 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10923 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10924 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10925 OpcodeStr, "$src3, {sae}, $src2, $src1", 10926 "$src1, $src2, {sae}, $src3", 10927 (OpNode (_.VT _.RC:$src1), 10928 (_.VT _.RC:$src2), 10929 (i32 timm:$src3))>, 10930 EVEX_B, Sched<[sched]>; 10931} 10932 10933multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10934 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10935 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10936 let Predicates = [prd] in { 10937 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10938 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10939 EVEX_V512; 10940 10941 } 10942 let Predicates = [prd, HasVLX] in { 10943 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10944 EVEX_V128; 10945 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10946 EVEX_V256; 10947 } 10948} 10949 10950multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10951 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10952 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10953 let Predicates = [Pred] in { 10954 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10955 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10956 } 10957 let Predicates = [Pred, HasVLX] in { 10958 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10959 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10960 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10961 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10962 } 10963} 10964 10965multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10966 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10967 Predicate Pred = HasAVX512> { 10968 let Predicates = [Pred] in { 10969 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10970 EVEX_V512; 10971 } 10972 let Predicates = [Pred, HasVLX] in { 10973 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10974 EVEX_V128; 10975 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10976 EVEX_V256; 10977 } 10978} 10979 10980multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10981 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10982 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10983 let Predicates = [prd] in { 10984 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10985 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10986 } 10987} 10988 10989multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10990 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 10991 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 10992 X86SchedWriteWidths sched, Predicate prd>{ 10993 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 10994 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 10995 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 10996 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10997 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10998 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 10999 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 11000 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11001 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W; 11002} 11003 11004defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 11005 X86VReduce, X86VReduce, X86VReduceSAE, 11006 SchedWriteFRnd, HasDQI>; 11007defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 11008 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 11009 SchedWriteFRnd, HasAVX512>; 11010defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 11011 X86VGetMant, X86VGetMant, X86VGetMantSAE, 11012 SchedWriteFRnd, HasAVX512>; 11013 11014defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 11015 0x50, X86VRange, X86VRangeSAE, 11016 SchedWriteFAdd, HasDQI>, 11017 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11018defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 11019 0x50, X86VRange, X86VRangeSAE, 11020 SchedWriteFAdd, HasDQI>, 11021 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11022 11023defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 11024 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11025 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11026defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 11027 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11028 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11029 11030defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 11031 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11032 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11033defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 11034 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11035 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11036defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 11037 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 11038 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11039 11040defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 11041 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11042 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11043defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 11044 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11045 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11046defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 11047 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 11048 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11049 11050multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 11051 X86FoldableSchedWrite sched, 11052 X86VectorVTInfo _, 11053 X86VectorVTInfo CastInfo, 11054 string EVEX2VEXOvrd> { 11055 let ExeDomain = _.ExeDomain in { 11056 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11057 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11058 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11059 (_.VT (bitconvert 11060 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 11061 (i8 timm:$src3)))))>, 11062 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 11063 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11064 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11065 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11066 (_.VT 11067 (bitconvert 11068 (CastInfo.VT (X86Shuf128 _.RC:$src1, 11069 (CastInfo.LdFrag addr:$src2), 11070 (i8 timm:$src3)))))>, 11071 Sched<[sched.Folded, sched.ReadAfterFold]>, 11072 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 11073 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11074 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11075 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11076 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11077 (_.VT 11078 (bitconvert 11079 (CastInfo.VT 11080 (X86Shuf128 _.RC:$src1, 11081 (_.BroadcastLdFrag addr:$src2), 11082 (i8 timm:$src3)))))>, EVEX_B, 11083 Sched<[sched.Folded, sched.ReadAfterFold]>; 11084 } 11085} 11086 11087multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 11088 AVX512VLVectorVTInfo _, 11089 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 11090 string EVEX2VEXOvrd>{ 11091 let Predicates = [HasAVX512] in 11092 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11093 _.info512, CastInfo.info512, "">, EVEX_V512; 11094 11095 let Predicates = [HasAVX512, HasVLX] in 11096 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11097 _.info256, CastInfo.info256, 11098 EVEX2VEXOvrd>, EVEX_V256; 11099} 11100 11101defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 11102 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11103defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 11104 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11105defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 11106 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11107defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 11108 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11109 11110multiclass avx512_valign<bits<8> opc, string OpcodeStr, 11111 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11112 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 11113 // instantiation of this class. 11114 let ExeDomain = _.ExeDomain in { 11115 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11116 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11117 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11118 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 11119 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 11120 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11121 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11122 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11123 (_.VT (X86VAlign _.RC:$src1, 11124 (bitconvert (_.LdFrag addr:$src2)), 11125 (i8 timm:$src3)))>, 11126 Sched<[sched.Folded, sched.ReadAfterFold]>, 11127 EVEX2VEXOverride<"VPALIGNRrmi">; 11128 11129 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11130 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11131 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11132 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11133 (X86VAlign _.RC:$src1, 11134 (_.VT (_.BroadcastLdFrag addr:$src2)), 11135 (i8 timm:$src3))>, EVEX_B, 11136 Sched<[sched.Folded, sched.ReadAfterFold]>; 11137 } 11138} 11139 11140multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 11141 AVX512VLVectorVTInfo _> { 11142 let Predicates = [HasAVX512] in { 11143 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 11144 AVX512AIi8Base, EVEX_4V, EVEX_V512; 11145 } 11146 let Predicates = [HasAVX512, HasVLX] in { 11147 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 11148 AVX512AIi8Base, EVEX_4V, EVEX_V128; 11149 // We can't really override the 256-bit version so change it back to unset. 11150 let EVEX2VEXOverride = ? in 11151 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 11152 AVX512AIi8Base, EVEX_4V, EVEX_V256; 11153 } 11154} 11155 11156defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 11157 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11158defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 11159 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 11160 VEX_W; 11161 11162defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 11163 SchedWriteShuffle, avx512vl_i8_info, 11164 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 11165 11166// Fragments to help convert valignq into masked valignd. Or valignq/valignd 11167// into vpalignr. 11168def ValignqImm32XForm : SDNodeXForm<timm, [{ 11169 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 11170}]>; 11171def ValignqImm8XForm : SDNodeXForm<timm, [{ 11172 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 11173}]>; 11174def ValigndImm8XForm : SDNodeXForm<timm, [{ 11175 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 11176}]>; 11177 11178multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 11179 X86VectorVTInfo From, X86VectorVTInfo To, 11180 SDNodeXForm ImmXForm> { 11181 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11182 (bitconvert 11183 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11184 timm:$src3))), 11185 To.RC:$src0)), 11186 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 11187 To.RC:$src1, To.RC:$src2, 11188 (ImmXForm timm:$src3))>; 11189 11190 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11191 (bitconvert 11192 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11193 timm:$src3))), 11194 To.ImmAllZerosV)), 11195 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 11196 To.RC:$src1, To.RC:$src2, 11197 (ImmXForm timm:$src3))>; 11198 11199 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11200 (bitconvert 11201 (From.VT (OpNode From.RC:$src1, 11202 (From.LdFrag addr:$src2), 11203 timm:$src3))), 11204 To.RC:$src0)), 11205 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 11206 To.RC:$src1, addr:$src2, 11207 (ImmXForm timm:$src3))>; 11208 11209 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11210 (bitconvert 11211 (From.VT (OpNode From.RC:$src1, 11212 (From.LdFrag addr:$src2), 11213 timm:$src3))), 11214 To.ImmAllZerosV)), 11215 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11216 To.RC:$src1, addr:$src2, 11217 (ImmXForm timm:$src3))>; 11218} 11219 11220multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11221 X86VectorVTInfo From, 11222 X86VectorVTInfo To, 11223 SDNodeXForm ImmXForm> : 11224 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11225 def : Pat<(From.VT (OpNode From.RC:$src1, 11226 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11227 timm:$src3)), 11228 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11229 (ImmXForm timm:$src3))>; 11230 11231 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11232 (bitconvert 11233 (From.VT (OpNode From.RC:$src1, 11234 (bitconvert 11235 (To.VT (To.BroadcastLdFrag addr:$src2))), 11236 timm:$src3))), 11237 To.RC:$src0)), 11238 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11239 To.RC:$src1, addr:$src2, 11240 (ImmXForm timm:$src3))>; 11241 11242 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11243 (bitconvert 11244 (From.VT (OpNode From.RC:$src1, 11245 (bitconvert 11246 (To.VT (To.BroadcastLdFrag addr:$src2))), 11247 timm:$src3))), 11248 To.ImmAllZerosV)), 11249 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11250 To.RC:$src1, addr:$src2, 11251 (ImmXForm timm:$src3))>; 11252} 11253 11254let Predicates = [HasAVX512] in { 11255 // For 512-bit we lower to the widest element type we can. So we only need 11256 // to handle converting valignq to valignd. 11257 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11258 v16i32_info, ValignqImm32XForm>; 11259} 11260 11261let Predicates = [HasVLX] in { 11262 // For 128-bit we lower to the widest element type we can. So we only need 11263 // to handle converting valignq to valignd. 11264 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11265 v4i32x_info, ValignqImm32XForm>; 11266 // For 256-bit we lower to the widest element type we can. So we only need 11267 // to handle converting valignq to valignd. 11268 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11269 v8i32x_info, ValignqImm32XForm>; 11270} 11271 11272let Predicates = [HasVLX, HasBWI] in { 11273 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11274 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11275 v16i8x_info, ValignqImm8XForm>; 11276 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11277 v16i8x_info, ValigndImm8XForm>; 11278} 11279 11280defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11281 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11282 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 11283 11284multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11285 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11286 let ExeDomain = _.ExeDomain in { 11287 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11288 (ins _.RC:$src1), OpcodeStr, 11289 "$src1", "$src1", 11290 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11291 Sched<[sched]>; 11292 11293 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11294 (ins _.MemOp:$src1), OpcodeStr, 11295 "$src1", "$src1", 11296 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11297 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11298 Sched<[sched.Folded]>; 11299 } 11300} 11301 11302multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11303 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11304 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11305 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11306 (ins _.ScalarMemOp:$src1), OpcodeStr, 11307 "${src1}"#_.BroadcastStr, 11308 "${src1}"#_.BroadcastStr, 11309 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11310 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11311 Sched<[sched.Folded]>; 11312} 11313 11314multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11315 X86SchedWriteWidths sched, 11316 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11317 let Predicates = [prd] in 11318 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11319 EVEX_V512; 11320 11321 let Predicates = [prd, HasVLX] in { 11322 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11323 EVEX_V256; 11324 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11325 EVEX_V128; 11326 } 11327} 11328 11329multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11330 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11331 Predicate prd> { 11332 let Predicates = [prd] in 11333 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11334 EVEX_V512; 11335 11336 let Predicates = [prd, HasVLX] in { 11337 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11338 EVEX_V256; 11339 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11340 EVEX_V128; 11341 } 11342} 11343 11344multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11345 SDNode OpNode, X86SchedWriteWidths sched, 11346 Predicate prd> { 11347 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11348 avx512vl_i64_info, prd>, VEX_W; 11349 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11350 avx512vl_i32_info, prd>; 11351} 11352 11353multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11354 SDNode OpNode, X86SchedWriteWidths sched, 11355 Predicate prd> { 11356 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11357 avx512vl_i16_info, prd>, VEX_WIG; 11358 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11359 avx512vl_i8_info, prd>, VEX_WIG; 11360} 11361 11362multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11363 bits<8> opc_d, bits<8> opc_q, 11364 string OpcodeStr, SDNode OpNode, 11365 X86SchedWriteWidths sched> { 11366 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11367 HasAVX512>, 11368 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11369 HasBWI>; 11370} 11371 11372defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11373 SchedWriteVecALU>; 11374 11375// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11376let Predicates = [HasAVX512, NoVLX] in { 11377 def : Pat<(v4i64 (abs VR256X:$src)), 11378 (EXTRACT_SUBREG 11379 (VPABSQZrr 11380 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11381 sub_ymm)>; 11382 def : Pat<(v2i64 (abs VR128X:$src)), 11383 (EXTRACT_SUBREG 11384 (VPABSQZrr 11385 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11386 sub_xmm)>; 11387} 11388 11389// Use 512bit version to implement 128/256 bit. 11390multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11391 AVX512VLVectorVTInfo _, Predicate prd> { 11392 let Predicates = [prd, NoVLX] in { 11393 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11394 (EXTRACT_SUBREG 11395 (!cast<Instruction>(InstrStr # "Zrr") 11396 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11397 _.info256.RC:$src1, 11398 _.info256.SubRegIdx)), 11399 _.info256.SubRegIdx)>; 11400 11401 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11402 (EXTRACT_SUBREG 11403 (!cast<Instruction>(InstrStr # "Zrr") 11404 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11405 _.info128.RC:$src1, 11406 _.info128.SubRegIdx)), 11407 _.info128.SubRegIdx)>; 11408 } 11409} 11410 11411defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11412 SchedWriteVecIMul, HasCDI>; 11413 11414// FIXME: Is there a better scheduler class for VPCONFLICT? 11415defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11416 SchedWriteVecALU, HasCDI>; 11417 11418// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11419defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11420defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11421 11422//===---------------------------------------------------------------------===// 11423// Counts number of ones - VPOPCNTD and VPOPCNTQ 11424//===---------------------------------------------------------------------===// 11425 11426// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11427defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11428 SchedWriteVecALU, HasVPOPCNTDQ>; 11429 11430defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11431defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11432 11433//===---------------------------------------------------------------------===// 11434// Replicate Single FP - MOVSHDUP and MOVSLDUP 11435//===---------------------------------------------------------------------===// 11436 11437multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11438 X86SchedWriteWidths sched> { 11439 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11440 avx512vl_f32_info, HasAVX512>, XS; 11441} 11442 11443defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11444 SchedWriteFShuffle>; 11445defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11446 SchedWriteFShuffle>; 11447 11448//===----------------------------------------------------------------------===// 11449// AVX-512 - MOVDDUP 11450//===----------------------------------------------------------------------===// 11451 11452multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11453 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11454 let ExeDomain = _.ExeDomain in { 11455 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11456 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11457 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11458 Sched<[sched]>; 11459 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11460 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11461 (_.VT (_.BroadcastLdFrag addr:$src))>, 11462 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11463 Sched<[sched.Folded]>; 11464 } 11465} 11466 11467multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11468 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11469 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11470 VTInfo.info512>, EVEX_V512; 11471 11472 let Predicates = [HasAVX512, HasVLX] in { 11473 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11474 VTInfo.info256>, EVEX_V256; 11475 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11476 VTInfo.info128>, EVEX_V128; 11477 } 11478} 11479 11480multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11481 X86SchedWriteWidths sched> { 11482 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11483 avx512vl_f64_info>, XD, VEX_W; 11484} 11485 11486defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11487 11488let Predicates = [HasVLX] in { 11489def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11490 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11491 11492def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11493 (v2f64 VR128X:$src0)), 11494 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11495 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11496def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11497 immAllZerosV), 11498 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11499} 11500 11501//===----------------------------------------------------------------------===// 11502// AVX-512 - Unpack Instructions 11503//===----------------------------------------------------------------------===// 11504 11505let Uses = []<Register>, mayRaiseFPException = 0 in { 11506defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11507 SchedWriteFShuffleSizes, 0, 1>; 11508defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11509 SchedWriteFShuffleSizes>; 11510} 11511 11512defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11513 SchedWriteShuffle, HasBWI>; 11514defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11515 SchedWriteShuffle, HasBWI>; 11516defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11517 SchedWriteShuffle, HasBWI>; 11518defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11519 SchedWriteShuffle, HasBWI>; 11520 11521defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11522 SchedWriteShuffle, HasAVX512>; 11523defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11524 SchedWriteShuffle, HasAVX512>; 11525defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11526 SchedWriteShuffle, HasAVX512>; 11527defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11528 SchedWriteShuffle, HasAVX512>; 11529 11530//===----------------------------------------------------------------------===// 11531// AVX-512 - Extract & Insert Integer Instructions 11532//===----------------------------------------------------------------------===// 11533 11534multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11535 X86VectorVTInfo _> { 11536 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 11537 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11538 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11539 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11540 addr:$dst)]>, 11541 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11542} 11543 11544multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11545 let Predicates = [HasBWI] in { 11546 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11547 (ins _.RC:$src1, u8imm:$src2), 11548 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11549 [(set GR32orGR64:$dst, 11550 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11551 EVEX, TAPD, Sched<[WriteVecExtract]>; 11552 11553 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 11554 } 11555} 11556 11557multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11558 let Predicates = [HasBWI] in { 11559 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11560 (ins _.RC:$src1, u8imm:$src2), 11561 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11562 [(set GR32orGR64:$dst, 11563 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11564 EVEX, PD, Sched<[WriteVecExtract]>; 11565 11566 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11567 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11568 (ins _.RC:$src1, u8imm:$src2), 11569 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11570 EVEX, TAPD, FoldGenData<NAME#rr>, 11571 Sched<[WriteVecExtract]>; 11572 11573 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 11574 } 11575} 11576 11577multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11578 RegisterClass GRC> { 11579 let Predicates = [HasDQI] in { 11580 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11581 (ins _.RC:$src1, u8imm:$src2), 11582 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11583 [(set GRC:$dst, 11584 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11585 EVEX, TAPD, Sched<[WriteVecExtract]>; 11586 11587 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 11588 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11589 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11590 [(store (extractelt (_.VT _.RC:$src1), 11591 imm:$src2),addr:$dst)]>, 11592 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 11593 Sched<[WriteVecExtractSt]>; 11594 } 11595} 11596 11597defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 11598defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 11599defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11600defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 11601 11602multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11603 X86VectorVTInfo _, PatFrag LdFrag, 11604 SDPatternOperator immoperator> { 11605 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11606 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11607 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11608 [(set _.RC:$dst, 11609 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11610 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11611} 11612 11613multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11614 X86VectorVTInfo _, PatFrag LdFrag> { 11615 let Predicates = [HasBWI] in { 11616 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11617 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11618 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11619 [(set _.RC:$dst, 11620 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, 11621 Sched<[WriteVecInsert]>; 11622 11623 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11624 } 11625} 11626 11627multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11628 X86VectorVTInfo _, RegisterClass GRC> { 11629 let Predicates = [HasDQI] in { 11630 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11631 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11632 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11633 [(set _.RC:$dst, 11634 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11635 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 11636 11637 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11638 _.ScalarLdFrag, imm>, TAPD; 11639 } 11640} 11641 11642defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11643 extloadi8>, TAPD, VEX_WIG; 11644defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11645 extloadi16>, PD, VEX_WIG; 11646defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11647defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 11648 11649//===----------------------------------------------------------------------===// 11650// VSHUFPS - VSHUFPD Operations 11651//===----------------------------------------------------------------------===// 11652 11653multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11654 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11655 SchedWriteFShuffle>, 11656 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11657 AVX512AIi8Base, EVEX_4V; 11658} 11659 11660defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS; 11661defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W; 11662 11663//===----------------------------------------------------------------------===// 11664// AVX-512 - Byte shift Left/Right 11665//===----------------------------------------------------------------------===// 11666 11667multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11668 Format MRMm, string OpcodeStr, 11669 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11670 def ri : AVX512<opc, MRMr, 11671 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11672 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11673 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11674 Sched<[sched]>; 11675 def mi : AVX512<opc, MRMm, 11676 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11677 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11678 [(set _.RC:$dst,(_.VT (OpNode 11679 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11680 (i8 timm:$src2))))]>, 11681 Sched<[sched.Folded, sched.ReadAfterFold]>; 11682} 11683 11684multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11685 Format MRMm, string OpcodeStr, 11686 X86SchedWriteWidths sched, Predicate prd>{ 11687 let Predicates = [prd] in 11688 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11689 sched.ZMM, v64i8_info>, EVEX_V512; 11690 let Predicates = [prd, HasVLX] in { 11691 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11692 sched.YMM, v32i8x_info>, EVEX_V256; 11693 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11694 sched.XMM, v16i8x_info>, EVEX_V128; 11695 } 11696} 11697defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11698 SchedWriteShuffle, HasBWI>, 11699 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11700defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11701 SchedWriteShuffle, HasBWI>, 11702 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11703 11704multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11705 string OpcodeStr, X86FoldableSchedWrite sched, 11706 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11707 let isCommutable = 1 in 11708 def rr : AVX512BI<opc, MRMSrcReg, 11709 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11710 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11711 [(set _dst.RC:$dst,(_dst.VT 11712 (OpNode (_src.VT _src.RC:$src1), 11713 (_src.VT _src.RC:$src2))))]>, 11714 Sched<[sched]>; 11715 def rm : AVX512BI<opc, MRMSrcMem, 11716 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11717 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11718 [(set _dst.RC:$dst,(_dst.VT 11719 (OpNode (_src.VT _src.RC:$src1), 11720 (_src.VT (bitconvert 11721 (_src.LdFrag addr:$src2))))))]>, 11722 Sched<[sched.Folded, sched.ReadAfterFold]>; 11723} 11724 11725multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11726 string OpcodeStr, X86SchedWriteWidths sched, 11727 Predicate prd> { 11728 let Predicates = [prd] in 11729 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11730 v8i64_info, v64i8_info>, EVEX_V512; 11731 let Predicates = [prd, HasVLX] in { 11732 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11733 v4i64x_info, v32i8x_info>, EVEX_V256; 11734 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11735 v2i64x_info, v16i8x_info>, EVEX_V128; 11736 } 11737} 11738 11739defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11740 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11741 11742// Transforms to swizzle an immediate to enable better matching when 11743// memory operand isn't in the right place. 11744def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11745 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11746 uint8_t Imm = N->getZExtValue(); 11747 // Swap bits 1/4 and 3/6. 11748 uint8_t NewImm = Imm & 0xa5; 11749 if (Imm & 0x02) NewImm |= 0x10; 11750 if (Imm & 0x10) NewImm |= 0x02; 11751 if (Imm & 0x08) NewImm |= 0x40; 11752 if (Imm & 0x40) NewImm |= 0x08; 11753 return getI8Imm(NewImm, SDLoc(N)); 11754}]>; 11755def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11756 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11757 uint8_t Imm = N->getZExtValue(); 11758 // Swap bits 2/4 and 3/5. 11759 uint8_t NewImm = Imm & 0xc3; 11760 if (Imm & 0x04) NewImm |= 0x10; 11761 if (Imm & 0x10) NewImm |= 0x04; 11762 if (Imm & 0x08) NewImm |= 0x20; 11763 if (Imm & 0x20) NewImm |= 0x08; 11764 return getI8Imm(NewImm, SDLoc(N)); 11765}]>; 11766def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11767 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11768 uint8_t Imm = N->getZExtValue(); 11769 // Swap bits 1/2 and 5/6. 11770 uint8_t NewImm = Imm & 0x99; 11771 if (Imm & 0x02) NewImm |= 0x04; 11772 if (Imm & 0x04) NewImm |= 0x02; 11773 if (Imm & 0x20) NewImm |= 0x40; 11774 if (Imm & 0x40) NewImm |= 0x20; 11775 return getI8Imm(NewImm, SDLoc(N)); 11776}]>; 11777def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11778 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11779 uint8_t Imm = N->getZExtValue(); 11780 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11781 uint8_t NewImm = Imm & 0x81; 11782 if (Imm & 0x02) NewImm |= 0x04; 11783 if (Imm & 0x04) NewImm |= 0x10; 11784 if (Imm & 0x08) NewImm |= 0x40; 11785 if (Imm & 0x10) NewImm |= 0x02; 11786 if (Imm & 0x20) NewImm |= 0x08; 11787 if (Imm & 0x40) NewImm |= 0x20; 11788 return getI8Imm(NewImm, SDLoc(N)); 11789}]>; 11790def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11791 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11792 uint8_t Imm = N->getZExtValue(); 11793 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11794 uint8_t NewImm = Imm & 0x81; 11795 if (Imm & 0x02) NewImm |= 0x10; 11796 if (Imm & 0x04) NewImm |= 0x02; 11797 if (Imm & 0x08) NewImm |= 0x20; 11798 if (Imm & 0x10) NewImm |= 0x04; 11799 if (Imm & 0x20) NewImm |= 0x40; 11800 if (Imm & 0x40) NewImm |= 0x08; 11801 return getI8Imm(NewImm, SDLoc(N)); 11802}]>; 11803 11804multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11805 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11806 string Name>{ 11807 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11808 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11809 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11810 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11811 (OpNode (_.VT _.RC:$src1), 11812 (_.VT _.RC:$src2), 11813 (_.VT _.RC:$src3), 11814 (i8 timm:$src4)), 1, 1>, 11815 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11816 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11817 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11818 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11819 (OpNode (_.VT _.RC:$src1), 11820 (_.VT _.RC:$src2), 11821 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11822 (i8 timm:$src4)), 1, 0>, 11823 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11824 Sched<[sched.Folded, sched.ReadAfterFold]>; 11825 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11826 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11827 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11828 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11829 (OpNode (_.VT _.RC:$src1), 11830 (_.VT _.RC:$src2), 11831 (_.VT (_.BroadcastLdFrag addr:$src3)), 11832 (i8 timm:$src4)), 1, 0>, EVEX_B, 11833 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11834 Sched<[sched.Folded, sched.ReadAfterFold]>; 11835 }// Constraints = "$src1 = $dst" 11836 11837 // Additional patterns for matching passthru operand in other positions. 11838 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11839 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11840 _.RC:$src1)), 11841 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11842 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11843 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11844 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11845 _.RC:$src1)), 11846 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11847 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11848 11849 // Additional patterns for matching zero masking with loads in other 11850 // positions. 11851 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11852 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11853 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11854 _.ImmAllZerosV)), 11855 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11856 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11857 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11858 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11859 _.RC:$src2, (i8 timm:$src4)), 11860 _.ImmAllZerosV)), 11861 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11862 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11863 11864 // Additional patterns for matching masked loads with different 11865 // operand orders. 11866 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11867 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11868 _.RC:$src2, (i8 timm:$src4)), 11869 _.RC:$src1)), 11870 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11871 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11872 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11873 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11874 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11875 _.RC:$src1)), 11876 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11877 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11878 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11879 (OpNode _.RC:$src2, _.RC:$src1, 11880 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11881 _.RC:$src1)), 11882 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11883 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11884 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11885 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11886 _.RC:$src1, (i8 timm:$src4)), 11887 _.RC:$src1)), 11888 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11889 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11890 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11891 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11892 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11893 _.RC:$src1)), 11894 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11895 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11896 11897 // Additional patterns for matching zero masking with broadcasts in other 11898 // positions. 11899 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11900 (OpNode (_.BroadcastLdFrag addr:$src3), 11901 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11902 _.ImmAllZerosV)), 11903 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11904 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11905 (VPTERNLOG321_imm8 timm:$src4))>; 11906 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11907 (OpNode _.RC:$src1, 11908 (_.BroadcastLdFrag addr:$src3), 11909 _.RC:$src2, (i8 timm:$src4)), 11910 _.ImmAllZerosV)), 11911 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11912 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11913 (VPTERNLOG132_imm8 timm:$src4))>; 11914 11915 // Additional patterns for matching masked broadcasts with different 11916 // operand orders. 11917 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11918 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11919 _.RC:$src2, (i8 timm:$src4)), 11920 _.RC:$src1)), 11921 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11922 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11923 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11924 (OpNode (_.BroadcastLdFrag addr:$src3), 11925 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11926 _.RC:$src1)), 11927 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11928 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11929 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11930 (OpNode _.RC:$src2, _.RC:$src1, 11931 (_.BroadcastLdFrag addr:$src3), 11932 (i8 timm:$src4)), _.RC:$src1)), 11933 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11934 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11935 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11936 (OpNode _.RC:$src2, 11937 (_.BroadcastLdFrag addr:$src3), 11938 _.RC:$src1, (i8 timm:$src4)), 11939 _.RC:$src1)), 11940 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11941 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11942 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11943 (OpNode (_.BroadcastLdFrag addr:$src3), 11944 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11945 _.RC:$src1)), 11946 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11947 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11948} 11949 11950multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11951 AVX512VLVectorVTInfo _> { 11952 let Predicates = [HasAVX512] in 11953 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11954 _.info512, NAME>, EVEX_V512; 11955 let Predicates = [HasAVX512, HasVLX] in { 11956 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11957 _.info128, NAME>, EVEX_V128; 11958 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11959 _.info256, NAME>, EVEX_V256; 11960 } 11961} 11962 11963defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11964 avx512vl_i32_info>; 11965defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11966 avx512vl_i64_info>, VEX_W; 11967 11968// Patterns to implement vnot using vpternlog instead of creating all ones 11969// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11970// so that the result is only dependent on src0. But we use the same source 11971// for all operands to prevent a false dependency. 11972// TODO: We should maybe have a more generalized algorithm for folding to 11973// vpternlog. 11974let Predicates = [HasAVX512] in { 11975 def : Pat<(v64i8 (vnot VR512:$src)), 11976 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11977 def : Pat<(v32i16 (vnot VR512:$src)), 11978 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11979 def : Pat<(v16i32 (vnot VR512:$src)), 11980 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11981 def : Pat<(v8i64 (vnot VR512:$src)), 11982 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11983} 11984 11985let Predicates = [HasAVX512, NoVLX] in { 11986 def : Pat<(v16i8 (vnot VR128X:$src)), 11987 (EXTRACT_SUBREG 11988 (VPTERNLOGQZrri 11989 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11990 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11991 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11992 (i8 15)), sub_xmm)>; 11993 def : Pat<(v8i16 (vnot VR128X:$src)), 11994 (EXTRACT_SUBREG 11995 (VPTERNLOGQZrri 11996 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11997 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11998 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11999 (i8 15)), sub_xmm)>; 12000 def : Pat<(v4i32 (vnot VR128X:$src)), 12001 (EXTRACT_SUBREG 12002 (VPTERNLOGQZrri 12003 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12004 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12005 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12006 (i8 15)), sub_xmm)>; 12007 def : Pat<(v2i64 (vnot VR128X:$src)), 12008 (EXTRACT_SUBREG 12009 (VPTERNLOGQZrri 12010 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12011 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12012 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12013 (i8 15)), sub_xmm)>; 12014 12015 def : Pat<(v32i8 (vnot VR256X:$src)), 12016 (EXTRACT_SUBREG 12017 (VPTERNLOGQZrri 12018 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12020 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12021 (i8 15)), sub_ymm)>; 12022 def : Pat<(v16i16 (vnot VR256X:$src)), 12023 (EXTRACT_SUBREG 12024 (VPTERNLOGQZrri 12025 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12026 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12027 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12028 (i8 15)), sub_ymm)>; 12029 def : Pat<(v8i32 (vnot VR256X:$src)), 12030 (EXTRACT_SUBREG 12031 (VPTERNLOGQZrri 12032 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12033 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12034 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12035 (i8 15)), sub_ymm)>; 12036 def : Pat<(v4i64 (vnot VR256X:$src)), 12037 (EXTRACT_SUBREG 12038 (VPTERNLOGQZrri 12039 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12040 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12041 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12042 (i8 15)), sub_ymm)>; 12043} 12044 12045let Predicates = [HasVLX] in { 12046 def : Pat<(v16i8 (vnot VR128X:$src)), 12047 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12048 def : Pat<(v8i16 (vnot VR128X:$src)), 12049 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12050 def : Pat<(v4i32 (vnot VR128X:$src)), 12051 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12052 def : Pat<(v2i64 (vnot VR128X:$src)), 12053 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12054 12055 def : Pat<(v32i8 (vnot VR256X:$src)), 12056 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12057 def : Pat<(v16i16 (vnot VR256X:$src)), 12058 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12059 def : Pat<(v8i32 (vnot VR256X:$src)), 12060 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12061 def : Pat<(v4i64 (vnot VR256X:$src)), 12062 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12063} 12064 12065//===----------------------------------------------------------------------===// 12066// AVX-512 - FixupImm 12067//===----------------------------------------------------------------------===// 12068 12069multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 12070 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12071 X86VectorVTInfo TblVT>{ 12072 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 12073 Uses = [MXCSR], mayRaiseFPException = 1 in { 12074 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12075 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12076 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12077 (X86VFixupimm (_.VT _.RC:$src1), 12078 (_.VT _.RC:$src2), 12079 (TblVT.VT _.RC:$src3), 12080 (i32 timm:$src4))>, Sched<[sched]>; 12081 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12082 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 12083 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12084 (X86VFixupimm (_.VT _.RC:$src1), 12085 (_.VT _.RC:$src2), 12086 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 12087 (i32 timm:$src4))>, 12088 Sched<[sched.Folded, sched.ReadAfterFold]>; 12089 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12090 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12091 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 12092 "$src2, ${src3}"#_.BroadcastStr#", $src4", 12093 (X86VFixupimm (_.VT _.RC:$src1), 12094 (_.VT _.RC:$src2), 12095 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 12096 (i32 timm:$src4))>, 12097 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12098 } // Constraints = "$src1 = $dst" 12099} 12100 12101multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 12102 X86FoldableSchedWrite sched, 12103 X86VectorVTInfo _, X86VectorVTInfo TblVT> 12104 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 12105let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 12106 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12107 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12108 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12109 "$src2, $src3, {sae}, $src4", 12110 (X86VFixupimmSAE (_.VT _.RC:$src1), 12111 (_.VT _.RC:$src2), 12112 (TblVT.VT _.RC:$src3), 12113 (i32 timm:$src4))>, 12114 EVEX_B, Sched<[sched]>; 12115 } 12116} 12117 12118multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 12119 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12120 X86VectorVTInfo _src3VT> { 12121 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 12122 ExeDomain = _.ExeDomain in { 12123 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12124 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12125 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12126 (X86VFixupimms (_.VT _.RC:$src1), 12127 (_.VT _.RC:$src2), 12128 (_src3VT.VT _src3VT.RC:$src3), 12129 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 12130 let Uses = [MXCSR] in 12131 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12132 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12133 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12134 "$src2, $src3, {sae}, $src4", 12135 (X86VFixupimmSAEs (_.VT _.RC:$src1), 12136 (_.VT _.RC:$src2), 12137 (_src3VT.VT _src3VT.RC:$src3), 12138 (i32 timm:$src4))>, 12139 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12140 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 12141 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12142 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12143 (X86VFixupimms (_.VT _.RC:$src1), 12144 (_.VT _.RC:$src2), 12145 (_src3VT.VT (scalar_to_vector 12146 (_src3VT.ScalarLdFrag addr:$src3))), 12147 (i32 timm:$src4))>, 12148 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 12149 } 12150} 12151 12152multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 12153 AVX512VLVectorVTInfo _Vec, 12154 AVX512VLVectorVTInfo _Tbl> { 12155 let Predicates = [HasAVX512] in 12156 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 12157 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 12158 EVEX_4V, EVEX_V512; 12159 let Predicates = [HasAVX512, HasVLX] in { 12160 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 12161 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 12162 EVEX_4V, EVEX_V128; 12163 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 12164 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 12165 EVEX_4V, EVEX_V256; 12166 } 12167} 12168 12169defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12170 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 12171 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 12172defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12173 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 12174 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 12175defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 12176 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12177defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 12178 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 12179 12180// Patterns used to select SSE scalar fp arithmetic instructions from 12181// either: 12182// 12183// (1) a scalar fp operation followed by a blend 12184// 12185// The effect is that the backend no longer emits unnecessary vector 12186// insert instructions immediately after SSE scalar fp instructions 12187// like addss or mulss. 12188// 12189// For example, given the following code: 12190// __m128 foo(__m128 A, __m128 B) { 12191// A[0] += B[0]; 12192// return A; 12193// } 12194// 12195// Previously we generated: 12196// addss %xmm0, %xmm1 12197// movss %xmm1, %xmm0 12198// 12199// We now generate: 12200// addss %xmm1, %xmm0 12201// 12202// (2) a vector packed single/double fp operation followed by a vector insert 12203// 12204// The effect is that the backend converts the packed fp instruction 12205// followed by a vector insert into a single SSE scalar fp instruction. 12206// 12207// For example, given the following code: 12208// __m128 foo(__m128 A, __m128 B) { 12209// __m128 C = A + B; 12210// return (__m128) {c[0], a[1], a[2], a[3]}; 12211// } 12212// 12213// Previously we generated: 12214// addps %xmm0, %xmm1 12215// movss %xmm1, %xmm0 12216// 12217// We now generate: 12218// addss %xmm1, %xmm0 12219 12220// TODO: Some canonicalization in lowering would simplify the number of 12221// patterns we have to try to match. 12222multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12223 string OpcPrefix, SDNode MoveNode, 12224 X86VectorVTInfo _, PatLeaf ZeroFP> { 12225 let Predicates = [HasAVX512] in { 12226 // extracted scalar math op with insert via movss 12227 def : Pat<(MoveNode 12228 (_.VT VR128X:$dst), 12229 (_.VT (scalar_to_vector 12230 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12231 _.FRC:$src)))), 12232 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12233 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12234 def : Pat<(MoveNode 12235 (_.VT VR128X:$dst), 12236 (_.VT (scalar_to_vector 12237 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12238 (_.ScalarLdFrag addr:$src))))), 12239 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12240 12241 // extracted masked scalar math op with insert via movss 12242 def : Pat<(MoveNode (_.VT VR128X:$src1), 12243 (scalar_to_vector 12244 (X86selects_mask VK1WM:$mask, 12245 (MaskedOp (_.EltVT 12246 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12247 _.FRC:$src2), 12248 _.FRC:$src0))), 12249 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12250 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12251 VK1WM:$mask, _.VT:$src1, 12252 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12253 def : Pat<(MoveNode (_.VT VR128X:$src1), 12254 (scalar_to_vector 12255 (X86selects_mask VK1WM:$mask, 12256 (MaskedOp (_.EltVT 12257 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12258 (_.ScalarLdFrag addr:$src2)), 12259 _.FRC:$src0))), 12260 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12261 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12262 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12263 12264 // extracted masked scalar math op with insert via movss 12265 def : Pat<(MoveNode (_.VT VR128X:$src1), 12266 (scalar_to_vector 12267 (X86selects_mask VK1WM:$mask, 12268 (MaskedOp (_.EltVT 12269 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12270 _.FRC:$src2), (_.EltVT ZeroFP)))), 12271 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12272 VK1WM:$mask, _.VT:$src1, 12273 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12274 def : Pat<(MoveNode (_.VT VR128X:$src1), 12275 (scalar_to_vector 12276 (X86selects_mask VK1WM:$mask, 12277 (MaskedOp (_.EltVT 12278 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12279 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12280 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12281 } 12282} 12283 12284defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12285defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12286defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12287defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12288 12289defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12290defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12291defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12292defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12293 12294defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12295defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12296defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12297defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12298 12299multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12300 SDNode Move, X86VectorVTInfo _> { 12301 let Predicates = [HasAVX512] in { 12302 def : Pat<(_.VT (Move _.VT:$dst, 12303 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12304 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12305 } 12306} 12307 12308defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12309defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12310defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12311 12312//===----------------------------------------------------------------------===// 12313// AES instructions 12314//===----------------------------------------------------------------------===// 12315 12316multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12317 let Predicates = [HasVLX, HasVAES] in { 12318 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12319 !cast<Intrinsic>(IntPrefix), 12320 loadv2i64, 0, VR128X, i128mem>, 12321 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 12322 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12323 !cast<Intrinsic>(IntPrefix#"_256"), 12324 loadv4i64, 0, VR256X, i256mem>, 12325 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 12326 } 12327 let Predicates = [HasAVX512, HasVAES] in 12328 defm Z : AESI_binop_rm_int<Op, OpStr, 12329 !cast<Intrinsic>(IntPrefix#"_512"), 12330 loadv8i64, 0, VR512, i512mem>, 12331 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 12332} 12333 12334defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12335defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12336defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12337defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12338 12339//===----------------------------------------------------------------------===// 12340// PCLMUL instructions - Carry less multiplication 12341//===----------------------------------------------------------------------===// 12342 12343let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12344defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12345 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 12346 12347let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12348defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12349 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 12350 12351defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12352 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 12353 EVEX_CD8<64, CD8VF>, VEX_WIG; 12354} 12355 12356// Aliases 12357defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12358defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12359defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12360 12361//===----------------------------------------------------------------------===// 12362// VBMI2 12363//===----------------------------------------------------------------------===// 12364 12365multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12366 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12367 let Constraints = "$src1 = $dst", 12368 ExeDomain = VTI.ExeDomain in { 12369 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12370 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12371 "$src3, $src2", "$src2, $src3", 12372 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12373 T8PD, EVEX_4V, Sched<[sched]>; 12374 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12375 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12376 "$src3, $src2", "$src2, $src3", 12377 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12378 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12379 T8PD, EVEX_4V, 12380 Sched<[sched.Folded, sched.ReadAfterFold]>; 12381 } 12382} 12383 12384multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12385 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12386 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12387 let Constraints = "$src1 = $dst", 12388 ExeDomain = VTI.ExeDomain in 12389 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12390 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12391 "${src3}"#VTI.BroadcastStr#", $src2", 12392 "$src2, ${src3}"#VTI.BroadcastStr, 12393 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12394 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12395 T8PD, EVEX_4V, EVEX_B, 12396 Sched<[sched.Folded, sched.ReadAfterFold]>; 12397} 12398 12399multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12400 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12401 let Predicates = [HasVBMI2] in 12402 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12403 EVEX_V512; 12404 let Predicates = [HasVBMI2, HasVLX] in { 12405 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12406 EVEX_V256; 12407 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12408 EVEX_V128; 12409 } 12410} 12411 12412multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12413 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12414 let Predicates = [HasVBMI2] in 12415 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12416 EVEX_V512; 12417 let Predicates = [HasVBMI2, HasVLX] in { 12418 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12419 EVEX_V256; 12420 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12421 EVEX_V128; 12422 } 12423} 12424multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12425 SDNode OpNode, X86SchedWriteWidths sched> { 12426 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12427 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 12428 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12429 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12430 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12431 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 12432} 12433 12434multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12435 SDNode OpNode, X86SchedWriteWidths sched> { 12436 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12437 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12438 VEX_W, EVEX_CD8<16, CD8VF>; 12439 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12440 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 12441 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12442 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 12443} 12444 12445// Concat & Shift 12446defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12447defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12448defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12449defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12450 12451// Compress 12452defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12453 avx512vl_i8_info, HasVBMI2>, EVEX, 12454 NotMemoryFoldable; 12455defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12456 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 12457 NotMemoryFoldable; 12458// Expand 12459defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12460 avx512vl_i8_info, HasVBMI2>, EVEX; 12461defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12462 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 12463 12464//===----------------------------------------------------------------------===// 12465// VNNI 12466//===----------------------------------------------------------------------===// 12467 12468let Constraints = "$src1 = $dst" in 12469multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12470 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12471 bit IsCommutable> { 12472 let ExeDomain = VTI.ExeDomain in { 12473 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12474 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12475 "$src3, $src2", "$src2, $src3", 12476 (VTI.VT (OpNode VTI.RC:$src1, 12477 VTI.RC:$src2, VTI.RC:$src3)), 12478 IsCommutable, IsCommutable>, 12479 EVEX_4V, T8PD, Sched<[sched]>; 12480 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12481 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12482 "$src3, $src2", "$src2, $src3", 12483 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12484 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12485 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 12486 Sched<[sched.Folded, sched.ReadAfterFold]>; 12487 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12488 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12489 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12490 "$src2, ${src3}"#VTI.BroadcastStr, 12491 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12492 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12493 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 12494 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 12495 } 12496} 12497 12498multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12499 X86SchedWriteWidths sched, bit IsCommutable> { 12500 let Predicates = [HasVNNI] in 12501 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12502 IsCommutable>, EVEX_V512; 12503 let Predicates = [HasVNNI, HasVLX] in { 12504 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12505 IsCommutable>, EVEX_V256; 12506 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12507 IsCommutable>, EVEX_V128; 12508 } 12509} 12510 12511// FIXME: Is there a better scheduler class for VPDP? 12512defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12513defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12514defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12515defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12516 12517// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12518let Predicates = [HasVNNI] in { 12519 def : Pat<(v16i32 (add VR512:$src1, 12520 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12521 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12522 def : Pat<(v16i32 (add VR512:$src1, 12523 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12524 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12525} 12526let Predicates = [HasVNNI,HasVLX] in { 12527 def : Pat<(v8i32 (add VR256X:$src1, 12528 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12529 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12530 def : Pat<(v8i32 (add VR256X:$src1, 12531 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12532 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12533 def : Pat<(v4i32 (add VR128X:$src1, 12534 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12535 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12536 def : Pat<(v4i32 (add VR128X:$src1, 12537 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12538 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12539} 12540 12541//===----------------------------------------------------------------------===// 12542// Bit Algorithms 12543//===----------------------------------------------------------------------===// 12544 12545// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12546defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12547 avx512vl_i8_info, HasBITALG>; 12548defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12549 avx512vl_i16_info, HasBITALG>, VEX_W; 12550 12551defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12552defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12553 12554def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 12555 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 12556 return N->hasOneUse(); 12557}]>; 12558 12559multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12560 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12561 (ins VTI.RC:$src1, VTI.RC:$src2), 12562 "vpshufbitqmb", 12563 "$src2, $src1", "$src1, $src2", 12564 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12565 (VTI.VT VTI.RC:$src2)), 12566 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12567 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 12568 Sched<[sched]>; 12569 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12570 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12571 "vpshufbitqmb", 12572 "$src2, $src1", "$src1, $src2", 12573 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12574 (VTI.VT (VTI.LdFrag addr:$src2))), 12575 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12576 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12577 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 12578 Sched<[sched.Folded, sched.ReadAfterFold]>; 12579} 12580 12581multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12582 let Predicates = [HasBITALG] in 12583 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12584 let Predicates = [HasBITALG, HasVLX] in { 12585 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12586 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12587 } 12588} 12589 12590// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12591defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12592 12593//===----------------------------------------------------------------------===// 12594// GFNI 12595//===----------------------------------------------------------------------===// 12596 12597multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12598 X86SchedWriteWidths sched> { 12599 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12600 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12601 EVEX_V512; 12602 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12603 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12604 EVEX_V256; 12605 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12606 EVEX_V128; 12607 } 12608} 12609 12610defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12611 SchedWriteVecALU>, 12612 EVEX_CD8<8, CD8VF>, T8PD; 12613 12614multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12615 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12616 X86VectorVTInfo BcstVTI> 12617 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12618 let ExeDomain = VTI.ExeDomain in 12619 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12620 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12621 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12622 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12623 (OpNode (VTI.VT VTI.RC:$src1), 12624 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12625 (i8 timm:$src3))>, EVEX_B, 12626 Sched<[sched.Folded, sched.ReadAfterFold]>; 12627} 12628 12629multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12630 X86SchedWriteWidths sched> { 12631 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12632 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12633 v64i8_info, v8i64_info>, EVEX_V512; 12634 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12635 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12636 v32i8x_info, v4i64x_info>, EVEX_V256; 12637 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12638 v16i8x_info, v2i64x_info>, EVEX_V128; 12639 } 12640} 12641 12642defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12643 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12644 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12645defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12646 X86GF2P8affineqb, SchedWriteVecIMul>, 12647 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12648 12649 12650//===----------------------------------------------------------------------===// 12651// AVX5124FMAPS 12652//===----------------------------------------------------------------------===// 12653 12654let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12655 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12656defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12657 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12658 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12659 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12660 Sched<[SchedWriteFMA.ZMM.Folded]>; 12661 12662defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12663 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12664 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12665 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12666 Sched<[SchedWriteFMA.ZMM.Folded]>; 12667 12668defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12669 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12670 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12671 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12672 Sched<[SchedWriteFMA.Scl.Folded]>; 12673 12674defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12675 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12676 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12677 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12678 Sched<[SchedWriteFMA.Scl.Folded]>; 12679} 12680 12681//===----------------------------------------------------------------------===// 12682// AVX5124VNNIW 12683//===----------------------------------------------------------------------===// 12684 12685let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12686 Constraints = "$src1 = $dst" in { 12687defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12688 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12689 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12690 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12691 Sched<[SchedWriteFMA.ZMM.Folded]>; 12692 12693defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12694 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12695 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12696 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12697 Sched<[SchedWriteFMA.ZMM.Folded]>; 12698} 12699 12700let hasSideEffects = 0 in { 12701 let mayStore = 1, SchedRW = [WriteFStoreX] in 12702 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12703 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12704 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12705} 12706 12707//===----------------------------------------------------------------------===// 12708// VP2INTERSECT 12709//===----------------------------------------------------------------------===// 12710 12711multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12712 def rr : I<0x68, MRMSrcReg, 12713 (outs _.KRPC:$dst), 12714 (ins _.RC:$src1, _.RC:$src2), 12715 !strconcat("vp2intersect", _.Suffix, 12716 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12717 [(set _.KRPC:$dst, (X86vp2intersect 12718 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12719 EVEX_4V, T8XD, Sched<[sched]>; 12720 12721 def rm : I<0x68, MRMSrcMem, 12722 (outs _.KRPC:$dst), 12723 (ins _.RC:$src1, _.MemOp:$src2), 12724 !strconcat("vp2intersect", _.Suffix, 12725 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12726 [(set _.KRPC:$dst, (X86vp2intersect 12727 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12728 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12729 Sched<[sched.Folded, sched.ReadAfterFold]>; 12730 12731 def rmb : I<0x68, MRMSrcMem, 12732 (outs _.KRPC:$dst), 12733 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12734 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12735 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12736 [(set _.KRPC:$dst, (X86vp2intersect 12737 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12738 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12739 Sched<[sched.Folded, sched.ReadAfterFold]>; 12740} 12741 12742multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12743 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12744 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12745 12746 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12747 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12748 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12749 } 12750} 12751 12752defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12753defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W; 12754 12755multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12756 X86SchedWriteWidths sched, 12757 AVX512VLVectorVTInfo _SrcVTInfo, 12758 AVX512VLVectorVTInfo _DstVTInfo, 12759 SDNode OpNode, Predicate prd, 12760 bit IsCommutable = 0> { 12761 let Predicates = [prd] in 12762 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12763 _SrcVTInfo.info512, _DstVTInfo.info512, 12764 _SrcVTInfo.info512, IsCommutable>, 12765 EVEX_V512, EVEX_CD8<32, CD8VF>; 12766 let Predicates = [HasVLX, prd] in { 12767 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12768 _SrcVTInfo.info256, _DstVTInfo.info256, 12769 _SrcVTInfo.info256, IsCommutable>, 12770 EVEX_V256, EVEX_CD8<32, CD8VF>; 12771 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12772 _SrcVTInfo.info128, _DstVTInfo.info128, 12773 _SrcVTInfo.info128, IsCommutable>, 12774 EVEX_V128, EVEX_CD8<32, CD8VF>; 12775 } 12776} 12777 12778let ExeDomain = SSEPackedSingle in 12779defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12780 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12781 avx512vl_f32_info, avx512vl_i16_info, 12782 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12783 12784// Truncate Float to BFloat16 12785multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12786 X86SchedWriteWidths sched> { 12787 let ExeDomain = SSEPackedSingle in { 12788 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12789 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12790 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12791 } 12792 let Predicates = [HasBF16, HasVLX] in { 12793 let Uses = []<Register>, mayRaiseFPException = 0 in { 12794 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12795 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12796 VK4WM>, EVEX_V128; 12797 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12798 X86cvtneps2bf16, X86cvtneps2bf16, 12799 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12800 } 12801 } // Predicates = [HasBF16, HasVLX] 12802 } // ExeDomain = SSEPackedSingle 12803 12804 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12805 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12806 VR128X:$src), 0>; 12807 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12808 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12809 f128mem:$src), 0, "intel">; 12810 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12811 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12812 VR256X:$src), 0>; 12813 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12814 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12815 f256mem:$src), 0, "intel">; 12816} 12817 12818defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12819 SchedWriteCvtPD2PS>, T8XS, 12820 EVEX_CD8<32, CD8VF>; 12821 12822let Predicates = [HasBF16, HasVLX] in { 12823 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12824 // patterns have been disabled with null_frag. 12825 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12826 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12827 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12828 VK4WM:$mask), 12829 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12830 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12831 VK4WM:$mask), 12832 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12833 12834 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12835 (VCVTNEPS2BF16Z128rm addr:$src)>; 12836 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12837 VK4WM:$mask), 12838 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12839 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12840 VK4WM:$mask), 12841 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12842 12843 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12844 (X86VBroadcastld32 addr:$src)))), 12845 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12846 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12847 (v8i16 VR128X:$src0), VK4WM:$mask), 12848 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12849 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12850 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12851 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12852} 12853 12854let Constraints = "$src1 = $dst" in { 12855multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12856 X86FoldableSchedWrite sched, 12857 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12858 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12859 (ins src_v.RC:$src2, src_v.RC:$src3), 12860 OpcodeStr, "$src3, $src2", "$src2, $src3", 12861 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12862 EVEX_4V, Sched<[sched]>; 12863 12864 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12865 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12866 OpcodeStr, "$src3, $src2", "$src2, $src3", 12867 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12868 (src_v.LdFrag addr:$src3)))>, EVEX_4V, 12869 Sched<[sched.Folded, sched.ReadAfterFold]>; 12870 12871 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12872 (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3), 12873 OpcodeStr, 12874 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12875 !strconcat("$src2, ${src3}", _.BroadcastStr), 12876 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12877 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12878 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 12879 12880} 12881} // Constraints = "$src1 = $dst" 12882 12883multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12884 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12885 AVX512VLVectorVTInfo src_v, Predicate prd> { 12886 let Predicates = [prd] in { 12887 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12888 src_v.info512>, EVEX_V512; 12889 } 12890 let Predicates = [HasVLX, prd] in { 12891 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12892 src_v.info256>, EVEX_V256; 12893 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12894 src_v.info128>, EVEX_V128; 12895 } 12896} 12897 12898let ExeDomain = SSEPackedSingle in 12899defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12900 avx512vl_f32_info, avx512vl_i32_info, 12901 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12902 12903//===----------------------------------------------------------------------===// 12904// AVX512FP16 12905//===----------------------------------------------------------------------===// 12906 12907let Predicates = [HasFP16] in { 12908// Move word ( r/m16) to Packed word 12909def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 12910 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 12911def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 12912 "vmovw\t{$src, $dst|$dst, $src}", 12913 [(set VR128X:$dst, 12914 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 12915 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 12916 12917def : Pat<(f16 (bitconvert GR16:$src)), 12918 (f16 (COPY_TO_REGCLASS 12919 (VMOVW2SHrr 12920 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 12921 FR16X))>; 12922def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 12923 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12924def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 12925 (VMOVW2SHrr GR32:$src)>; 12926// FIXME: We should really find a way to improve these patterns. 12927def : Pat<(v8i32 (X86vzmovl 12928 (insert_subvector undef, 12929 (v4i32 (scalar_to_vector 12930 (and GR32:$src, 0xffff))), 12931 (iPTR 0)))), 12932 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12933def : Pat<(v16i32 (X86vzmovl 12934 (insert_subvector undef, 12935 (v4i32 (scalar_to_vector 12936 (and GR32:$src, 0xffff))), 12937 (iPTR 0)))), 12938 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12939 12940def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))), 12941 (VMOVW2SHrr GR32:$src)>; 12942 12943// AVX 128-bit movw instruction write zeros in the high 128-bit part. 12944def : Pat<(v8i16 (X86vzload16 addr:$src)), 12945 (VMOVWrm addr:$src)>; 12946def : Pat<(v16i16 (X86vzload16 addr:$src)), 12947 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12948 12949// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 12950def : Pat<(v32i16 (X86vzload16 addr:$src)), 12951 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12952 12953def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 12954 (VMOVWrm addr:$src)>; 12955def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 12956 (VMOVWrm addr:$src)>; 12957def : Pat<(v8i32 (X86vzmovl 12958 (insert_subvector undef, 12959 (v4i32 (scalar_to_vector 12960 (i32 (zextloadi16 addr:$src)))), 12961 (iPTR 0)))), 12962 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12963def : Pat<(v16i32 (X86vzmovl 12964 (insert_subvector undef, 12965 (v4i32 (scalar_to_vector 12966 (i32 (zextloadi16 addr:$src)))), 12967 (iPTR 0)))), 12968 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12969 12970// Move word from xmm register to r/m16 12971def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 12972 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>; 12973def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 12974 (ins i16mem:$dst, VR128X:$src), 12975 "vmovw\t{$src, $dst|$dst, $src}", 12976 [(store (i16 (extractelt (v8i16 VR128X:$src), 12977 (iPTR 0))), addr:$dst)]>, 12978 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 12979 12980def : Pat<(i16 (bitconvert FR16X:$src)), 12981 (i16 (EXTRACT_SUBREG 12982 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 12983 sub_16bit))>; 12984def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 12985 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 12986} 12987 12988// Allow "vmovw" to use GR64 12989let hasSideEffects = 0 in { 12990 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 12991 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 12992 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 12993 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>; 12994} 12995 12996// Convert 16-bit float to i16/u16 12997multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12998 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12999 AVX512VLVectorVTInfo _Dst, 13000 AVX512VLVectorVTInfo _Src, 13001 X86SchedWriteWidths sched> { 13002 let Predicates = [HasFP16] in { 13003 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13004 OpNode, MaskOpNode, sched.ZMM>, 13005 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 13006 OpNodeRnd, sched.ZMM>, EVEX_V512; 13007 } 13008 let Predicates = [HasFP16, HasVLX] in { 13009 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13010 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13011 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13012 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13013 } 13014} 13015 13016// Convert 16-bit float to i16/u16 truncate 13017multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13018 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13019 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 13020 X86SchedWriteWidths sched> { 13021 let Predicates = [HasFP16] in { 13022 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13023 OpNode, MaskOpNode, sched.ZMM>, 13024 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 13025 OpNodeRnd, sched.ZMM>, EVEX_V512; 13026 } 13027 let Predicates = [HasFP16, HasVLX] in { 13028 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13029 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13030 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13031 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13032 } 13033} 13034 13035defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 13036 X86cvtp2UIntRnd, avx512vl_i16_info, 13037 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13038 T_MAP5PS, EVEX_CD8<16, CD8VF>; 13039defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 13040 X86VUintToFpRnd, avx512vl_f16_info, 13041 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13042 T_MAP5XD, EVEX_CD8<16, CD8VF>; 13043defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 13044 X86cvttp2si, X86cvttp2siSAE, 13045 avx512vl_i16_info, avx512vl_f16_info, 13046 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>; 13047defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 13048 X86cvttp2ui, X86cvttp2uiSAE, 13049 avx512vl_i16_info, avx512vl_f16_info, 13050 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>; 13051defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 13052 X86cvtp2IntRnd, avx512vl_i16_info, 13053 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13054 T_MAP5PD, EVEX_CD8<16, CD8VF>; 13055defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 13056 X86VSintToFpRnd, avx512vl_f16_info, 13057 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13058 T_MAP5XS, EVEX_CD8<16, CD8VF>; 13059 13060// Convert Half to Signed/Unsigned Doubleword 13061multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13062 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13063 X86SchedWriteWidths sched> { 13064 let Predicates = [HasFP16] in { 13065 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13066 MaskOpNode, sched.ZMM>, 13067 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 13068 OpNodeRnd, sched.ZMM>, EVEX_V512; 13069 } 13070 let Predicates = [HasFP16, HasVLX] in { 13071 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13072 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13073 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13074 MaskOpNode, sched.YMM>, EVEX_V256; 13075 } 13076} 13077 13078// Convert Half to Signed/Unsigned Doubleword with truncation 13079multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13080 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13081 X86SchedWriteWidths sched> { 13082 let Predicates = [HasFP16] in { 13083 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13084 MaskOpNode, sched.ZMM>, 13085 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 13086 OpNodeRnd, sched.ZMM>, EVEX_V512; 13087 } 13088 let Predicates = [HasFP16, HasVLX] in { 13089 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13090 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13091 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13092 MaskOpNode, sched.YMM>, EVEX_V256; 13093 } 13094} 13095 13096 13097defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 13098 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13099 EVEX_CD8<16, CD8VH>; 13100defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 13101 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS, 13102 EVEX_CD8<16, CD8VH>; 13103 13104defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 13105 X86cvttp2si, X86cvttp2siSAE, 13106 SchedWriteCvtPS2DQ>, T_MAP5XS, 13107 EVEX_CD8<16, CD8VH>; 13108 13109defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 13110 X86cvttp2ui, X86cvttp2uiSAE, 13111 SchedWriteCvtPS2DQ>, T_MAP5PS, 13112 EVEX_CD8<16, CD8VH>; 13113 13114// Convert Half to Signed/Unsigned Quardword 13115multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13116 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13117 X86SchedWriteWidths sched> { 13118 let Predicates = [HasFP16] in { 13119 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13120 MaskOpNode, sched.ZMM>, 13121 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 13122 OpNodeRnd, sched.ZMM>, EVEX_V512; 13123 } 13124 let Predicates = [HasFP16, HasVLX] in { 13125 // Explicitly specified broadcast string, since we take only 2 elements 13126 // from v8f16x_info source 13127 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13128 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 13129 EVEX_V128; 13130 // Explicitly specified broadcast string, since we take only 4 elements 13131 // from v8f16x_info source 13132 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13133 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 13134 EVEX_V256; 13135 } 13136} 13137 13138// Convert Half to Signed/Unsigned Quardword with truncation 13139multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13140 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13141 X86SchedWriteWidths sched> { 13142 let Predicates = [HasFP16] in { 13143 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13144 MaskOpNode, sched.ZMM>, 13145 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 13146 OpNodeRnd, sched.ZMM>, EVEX_V512; 13147 } 13148 let Predicates = [HasFP16, HasVLX] in { 13149 // Explicitly specified broadcast string, since we take only 2 elements 13150 // from v8f16x_info source 13151 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13152 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13153 // Explicitly specified broadcast string, since we take only 4 elements 13154 // from v8f16x_info source 13155 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13156 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13157 } 13158} 13159 13160defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13161 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13162 EVEX_CD8<16, CD8VQ>; 13163 13164defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13165 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13166 EVEX_CD8<16, CD8VQ>; 13167 13168defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13169 X86cvttp2si, X86cvttp2siSAE, 13170 SchedWriteCvtPS2DQ>, T_MAP5PD, 13171 EVEX_CD8<16, CD8VQ>; 13172 13173defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13174 X86cvttp2ui, X86cvttp2uiSAE, 13175 SchedWriteCvtPS2DQ>, T_MAP5PD, 13176 EVEX_CD8<16, CD8VQ>; 13177 13178// Convert Signed/Unsigned Quardword to Half 13179multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13180 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13181 X86SchedWriteWidths sched> { 13182 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13183 // 512 memory forms of these instructions in Asm Parcer. They have the same 13184 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13185 // due to the same reason. 13186 let Predicates = [HasFP16] in { 13187 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13188 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13189 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13190 OpNodeRnd, sched.ZMM>, EVEX_V512; 13191 } 13192 let Predicates = [HasFP16, HasVLX] in { 13193 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13194 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13195 i128mem, VK2WM>, 13196 EVEX_V128, NotEVEX2VEXConvertible; 13197 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13198 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13199 i256mem, VK4WM>, 13200 EVEX_V256, NotEVEX2VEXConvertible; 13201 } 13202 13203 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13204 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13205 VR128X:$src), 0, "att">; 13206 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13207 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13208 VK2WM:$mask, VR128X:$src), 0, "att">; 13209 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13210 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13211 VK2WM:$mask, VR128X:$src), 0, "att">; 13212 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13213 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13214 i64mem:$src), 0, "att">; 13215 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13216 "$dst {${mask}}, ${src}{1to2}}", 13217 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13218 VK2WM:$mask, i64mem:$src), 0, "att">; 13219 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13220 "$dst {${mask}} {z}, ${src}{1to2}}", 13221 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13222 VK2WM:$mask, i64mem:$src), 0, "att">; 13223 13224 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13225 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13226 VR256X:$src), 0, "att">; 13227 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13228 "$dst {${mask}}, $src}", 13229 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13230 VK4WM:$mask, VR256X:$src), 0, "att">; 13231 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13232 "$dst {${mask}} {z}, $src}", 13233 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13234 VK4WM:$mask, VR256X:$src), 0, "att">; 13235 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13236 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13237 i64mem:$src), 0, "att">; 13238 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13239 "$dst {${mask}}, ${src}{1to4}}", 13240 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13241 VK4WM:$mask, i64mem:$src), 0, "att">; 13242 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13243 "$dst {${mask}} {z}, ${src}{1to4}}", 13244 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13245 VK4WM:$mask, i64mem:$src), 0, "att">; 13246 13247 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13248 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13249 VR512:$src), 0, "att">; 13250 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13251 "$dst {${mask}}, $src}", 13252 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13253 VK8WM:$mask, VR512:$src), 0, "att">; 13254 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13255 "$dst {${mask}} {z}, $src}", 13256 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13257 VK8WM:$mask, VR512:$src), 0, "att">; 13258 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13259 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13260 i64mem:$src), 0, "att">; 13261 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13262 "$dst {${mask}}, ${src}{1to8}}", 13263 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13264 VK8WM:$mask, i64mem:$src), 0, "att">; 13265 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13266 "$dst {${mask}} {z}, ${src}{1to8}}", 13267 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13268 VK8WM:$mask, i64mem:$src), 0, "att">; 13269} 13270 13271defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13272 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS, 13273 EVEX_CD8<64, CD8VF>; 13274 13275defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13276 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD, 13277 EVEX_CD8<64, CD8VF>; 13278 13279// Convert half to signed/unsigned int 32/64 13280defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13281 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13282 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13283defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13284 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13285 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13286defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13287 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13288 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13289defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13290 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13291 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13292 13293defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13294 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13295 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13296defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13297 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13298 "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13299defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13300 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13301 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13302defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13303 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13304 "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13305 13306let Predicates = [HasFP16] in { 13307 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13308 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13309 T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13310 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13311 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13312 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>; 13313 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13314 v8f16x_info, i32mem, loadi32, 13315 "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13316 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13317 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13318 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>; 13319 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13320 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13321 13322 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13323 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13324 13325 13326 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13327 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13328 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13329 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13330 13331 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13332 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13333 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13334 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13335 13336 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13337 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13338 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13339 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13340 13341 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13342 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13343 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13344 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13345 13346 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13347 // which produce unnecessary vmovsh instructions 13348 def : Pat<(v8f16 (X86Movsh 13349 (v8f16 VR128X:$dst), 13350 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13351 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13352 13353 def : Pat<(v8f16 (X86Movsh 13354 (v8f16 VR128X:$dst), 13355 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13356 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13357 13358 def : Pat<(v8f16 (X86Movsh 13359 (v8f16 VR128X:$dst), 13360 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13361 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13362 13363 def : Pat<(v8f16 (X86Movsh 13364 (v8f16 VR128X:$dst), 13365 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13366 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13367 13368 def : Pat<(v8f16 (X86Movsh 13369 (v8f16 VR128X:$dst), 13370 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13371 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13372 13373 def : Pat<(v8f16 (X86Movsh 13374 (v8f16 VR128X:$dst), 13375 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13376 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13377 13378 def : Pat<(v8f16 (X86Movsh 13379 (v8f16 VR128X:$dst), 13380 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13381 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13382 13383 def : Pat<(v8f16 (X86Movsh 13384 (v8f16 VR128X:$dst), 13385 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13386 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13387} // Predicates = [HasFP16] 13388 13389let Predicates = [HasFP16, HasVLX] in { 13390 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13391 // patterns have been disabled with null_frag. 13392 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13393 (VCVTQQ2PHZ256rr VR256X:$src)>; 13394 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13395 VK4WM:$mask), 13396 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13397 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13398 VK4WM:$mask), 13399 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13400 13401 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13402 (VCVTQQ2PHZ256rm addr:$src)>; 13403 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13404 VK4WM:$mask), 13405 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13406 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13407 VK4WM:$mask), 13408 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13409 13410 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13411 (VCVTQQ2PHZ256rmb addr:$src)>; 13412 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13413 (v8f16 VR128X:$src0), VK4WM:$mask), 13414 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13415 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13416 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13417 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13418 13419 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13420 (VCVTQQ2PHZ128rr VR128X:$src)>; 13421 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13422 VK2WM:$mask), 13423 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13424 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13425 VK2WM:$mask), 13426 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13427 13428 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13429 (VCVTQQ2PHZ128rm addr:$src)>; 13430 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13431 VK2WM:$mask), 13432 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13433 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13434 VK2WM:$mask), 13435 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13436 13437 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13438 (VCVTQQ2PHZ128rmb addr:$src)>; 13439 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13440 (v8f16 VR128X:$src0), VK2WM:$mask), 13441 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13442 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13443 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13444 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13445 13446 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13447 // patterns have been disabled with null_frag. 13448 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13449 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13450 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13451 VK4WM:$mask), 13452 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13453 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13454 VK4WM:$mask), 13455 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13456 13457 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13458 (VCVTUQQ2PHZ256rm addr:$src)>; 13459 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13460 VK4WM:$mask), 13461 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13462 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13463 VK4WM:$mask), 13464 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13465 13466 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13467 (VCVTUQQ2PHZ256rmb addr:$src)>; 13468 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13469 (v8f16 VR128X:$src0), VK4WM:$mask), 13470 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13471 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13472 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13473 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13474 13475 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13476 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13477 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13478 VK2WM:$mask), 13479 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13480 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13481 VK2WM:$mask), 13482 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13483 13484 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13485 (VCVTUQQ2PHZ128rm addr:$src)>; 13486 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13487 VK2WM:$mask), 13488 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13489 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13490 VK2WM:$mask), 13491 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13492 13493 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13494 (VCVTUQQ2PHZ128rmb addr:$src)>; 13495 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13496 (v8f16 VR128X:$src0), VK2WM:$mask), 13497 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13498 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13499 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13500 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13501} 13502 13503let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13504 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13505 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13506 (ins _.RC:$src2, _.RC:$src3), 13507 OpcodeStr, "$src3, $src2", "$src2, $src3", 13508 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V; 13509 13510 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13511 (ins _.RC:$src2, _.MemOp:$src3), 13512 OpcodeStr, "$src3, $src2", "$src2, $src3", 13513 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V; 13514 13515 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13516 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13517 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13518 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V; 13519 } 13520} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13521 13522multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13523 X86VectorVTInfo _> { 13524 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13525 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13526 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13527 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13528 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13529 EVEX_4V, EVEX_B, EVEX_RC; 13530} 13531 13532 13533multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13534 let Predicates = [HasFP16] in { 13535 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13536 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13537 EVEX_V512, Sched<[WriteFMAZ]>; 13538 } 13539 let Predicates = [HasVLX, HasFP16] in { 13540 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13541 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13542 } 13543} 13544 13545multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13546 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13547 let Predicates = [HasFP16] in { 13548 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13549 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13550 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13551 "", "@earlyclobber $dst">, EVEX_V512; 13552 } 13553 let Predicates = [HasVLX, HasFP16] in { 13554 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13555 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13556 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13557 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13558 } 13559} 13560 13561 13562let Uses = [MXCSR] in { 13563 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13564 T_MAP6XS, EVEX_CD8<32, CD8VF>; 13565 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13566 T_MAP6XD, EVEX_CD8<32, CD8VF>; 13567 13568 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13569 x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>; 13570 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13571 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>; 13572} 13573 13574 13575multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13576 bit IsCommutable> { 13577 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13578 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13579 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13580 "$src3, $src2", "$src2, $src3", 13581 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13582 Sched<[WriteFMAX]>; 13583 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst), 13584 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13585 "$src3, $src2", "$src2, $src3", 13586 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13587 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13588 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13589 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13590 "$rc, $src3, $src2", "$src2, $src3, $rc", 13591 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13592 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13593 } 13594} 13595 13596multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13597 SDNode OpNodeRnd, bit IsCommutable> { 13598 let Predicates = [HasFP16] in { 13599 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13600 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13601 "$src2, $src1", "$src1, $src2", 13602 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13603 IsCommutable, IsCommutable, IsCommutable, 13604 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13605 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13606 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13607 "$src2, $src1", "$src1, $src2", 13608 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13609 0, 0, 0, X86selects, "@earlyclobber $dst">, 13610 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13611 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13612 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13613 "$rc, $src2, $src1", "$src1, $src2, $rc", 13614 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13615 0, 0, 0, X86selects, "@earlyclobber $dst">, 13616 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13617 } 13618} 13619 13620let Uses = [MXCSR] in { 13621 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13622 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13623 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13624 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13625 13626 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13627 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13628 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13629 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13630} 13631